sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111class _Parser(type): 112 def __new__(cls, clsname, bases, attrs): 113 klass = super().__new__(cls, clsname, bases, attrs) 114 115 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 116 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 117 118 return klass 119 120 121class Parser(metaclass=_Parser): 122 """ 123 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 124 125 Args: 126 error_level: The desired error level. 127 Default: ErrorLevel.IMMEDIATE 128 error_message_context: The amount of context to capture from a query string when displaying 129 the error message (in number of characters). 130 Default: 100 131 max_errors: Maximum number of error messages to include in a raised ParseError. 132 This is only relevant if error_level is ErrorLevel.RAISE. 133 Default: 3 134 """ 135 136 FUNCTIONS: t.Dict[str, t.Callable] = { 137 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 138 "CONCAT": lambda args, dialect: exp.Concat( 139 expressions=args, 140 safe=not dialect.STRICT_STRING_CONCAT, 141 coalesce=dialect.CONCAT_COALESCE, 142 ), 143 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 144 expressions=args, 145 safe=not dialect.STRICT_STRING_CONCAT, 146 coalesce=dialect.CONCAT_COALESCE, 147 ), 148 "DATE_TO_DATE_STR": lambda args: exp.Cast( 149 this=seq_get(args, 0), 150 to=exp.DataType(this=exp.DataType.Type.TEXT), 151 ), 152 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 153 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 154 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 155 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 156 "LIKE": build_like, 157 "LOG": build_logarithm, 158 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 159 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 160 "MOD": build_mod, 161 "TIME_TO_TIME_STR": lambda args: exp.Cast( 162 this=seq_get(args, 0), 163 to=exp.DataType(this=exp.DataType.Type.TEXT), 164 ), 165 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 166 this=exp.Cast( 167 this=seq_get(args, 0), 168 to=exp.DataType(this=exp.DataType.Type.TEXT), 169 ), 170 start=exp.Literal.number(1), 171 length=exp.Literal.number(10), 172 ), 173 "VAR_MAP": build_var_map, 174 "LOWER": build_lower, 175 "UPPER": build_upper, 176 "HEX": build_hex, 177 "TO_HEX": build_hex, 178 } 179 180 NO_PAREN_FUNCTIONS = { 181 TokenType.CURRENT_DATE: exp.CurrentDate, 182 TokenType.CURRENT_DATETIME: exp.CurrentDate, 183 TokenType.CURRENT_TIME: exp.CurrentTime, 184 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 185 TokenType.CURRENT_USER: exp.CurrentUser, 186 } 187 188 STRUCT_TYPE_TOKENS = { 189 TokenType.NESTED, 190 TokenType.OBJECT, 191 TokenType.STRUCT, 192 } 193 194 NESTED_TYPE_TOKENS = { 195 TokenType.ARRAY, 196 TokenType.LOWCARDINALITY, 197 TokenType.MAP, 198 TokenType.NULLABLE, 199 *STRUCT_TYPE_TOKENS, 200 } 201 202 ENUM_TYPE_TOKENS = { 203 TokenType.ENUM, 204 TokenType.ENUM8, 205 TokenType.ENUM16, 206 } 207 208 AGGREGATE_TYPE_TOKENS = { 209 TokenType.AGGREGATEFUNCTION, 210 TokenType.SIMPLEAGGREGATEFUNCTION, 211 } 212 213 TYPE_TOKENS = { 214 TokenType.BIT, 215 TokenType.BOOLEAN, 216 TokenType.TINYINT, 217 TokenType.UTINYINT, 218 TokenType.SMALLINT, 219 TokenType.USMALLINT, 220 TokenType.INT, 221 TokenType.UINT, 222 TokenType.BIGINT, 223 TokenType.UBIGINT, 224 TokenType.INT128, 225 TokenType.UINT128, 226 TokenType.INT256, 227 TokenType.UINT256, 228 TokenType.MEDIUMINT, 229 TokenType.UMEDIUMINT, 230 TokenType.FIXEDSTRING, 231 TokenType.FLOAT, 232 TokenType.DOUBLE, 233 TokenType.CHAR, 234 TokenType.NCHAR, 235 TokenType.VARCHAR, 236 TokenType.NVARCHAR, 237 TokenType.BPCHAR, 238 TokenType.TEXT, 239 TokenType.MEDIUMTEXT, 240 TokenType.LONGTEXT, 241 TokenType.MEDIUMBLOB, 242 TokenType.LONGBLOB, 243 TokenType.BINARY, 244 TokenType.VARBINARY, 245 TokenType.JSON, 246 TokenType.JSONB, 247 TokenType.INTERVAL, 248 TokenType.TINYBLOB, 249 TokenType.TINYTEXT, 250 TokenType.TIME, 251 TokenType.TIMETZ, 252 TokenType.TIMESTAMP, 253 TokenType.TIMESTAMP_S, 254 TokenType.TIMESTAMP_MS, 255 TokenType.TIMESTAMP_NS, 256 TokenType.TIMESTAMPTZ, 257 TokenType.TIMESTAMPLTZ, 258 TokenType.TIMESTAMPNTZ, 259 TokenType.DATETIME, 260 TokenType.DATETIME64, 261 TokenType.DATE, 262 TokenType.DATE32, 263 TokenType.INT4RANGE, 264 TokenType.INT4MULTIRANGE, 265 TokenType.INT8RANGE, 266 TokenType.INT8MULTIRANGE, 267 TokenType.NUMRANGE, 268 TokenType.NUMMULTIRANGE, 269 TokenType.TSRANGE, 270 TokenType.TSMULTIRANGE, 271 TokenType.TSTZRANGE, 272 TokenType.TSTZMULTIRANGE, 273 TokenType.DATERANGE, 274 TokenType.DATEMULTIRANGE, 275 TokenType.DECIMAL, 276 TokenType.UDECIMAL, 277 TokenType.BIGDECIMAL, 278 TokenType.UUID, 279 TokenType.GEOGRAPHY, 280 TokenType.GEOMETRY, 281 TokenType.HLLSKETCH, 282 TokenType.HSTORE, 283 TokenType.PSEUDO_TYPE, 284 TokenType.SUPER, 285 TokenType.SERIAL, 286 TokenType.SMALLSERIAL, 287 TokenType.BIGSERIAL, 288 TokenType.XML, 289 TokenType.YEAR, 290 TokenType.UNIQUEIDENTIFIER, 291 TokenType.USERDEFINED, 292 TokenType.MONEY, 293 TokenType.SMALLMONEY, 294 TokenType.ROWVERSION, 295 TokenType.IMAGE, 296 TokenType.VARIANT, 297 TokenType.OBJECT, 298 TokenType.OBJECT_IDENTIFIER, 299 TokenType.INET, 300 TokenType.IPADDRESS, 301 TokenType.IPPREFIX, 302 TokenType.IPV4, 303 TokenType.IPV6, 304 TokenType.UNKNOWN, 305 TokenType.NULL, 306 TokenType.NAME, 307 TokenType.TDIGEST, 308 *ENUM_TYPE_TOKENS, 309 *NESTED_TYPE_TOKENS, 310 *AGGREGATE_TYPE_TOKENS, 311 } 312 313 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 314 TokenType.BIGINT: TokenType.UBIGINT, 315 TokenType.INT: TokenType.UINT, 316 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 317 TokenType.SMALLINT: TokenType.USMALLINT, 318 TokenType.TINYINT: TokenType.UTINYINT, 319 TokenType.DECIMAL: TokenType.UDECIMAL, 320 } 321 322 SUBQUERY_PREDICATES = { 323 TokenType.ANY: exp.Any, 324 TokenType.ALL: exp.All, 325 TokenType.EXISTS: exp.Exists, 326 TokenType.SOME: exp.Any, 327 } 328 329 RESERVED_TOKENS = { 330 *Tokenizer.SINGLE_TOKENS.values(), 331 TokenType.SELECT, 332 } - {TokenType.IDENTIFIER} 333 334 DB_CREATABLES = { 335 TokenType.DATABASE, 336 TokenType.DICTIONARY, 337 TokenType.MODEL, 338 TokenType.SCHEMA, 339 TokenType.SEQUENCE, 340 TokenType.STORAGE_INTEGRATION, 341 TokenType.TABLE, 342 TokenType.TAG, 343 TokenType.VIEW, 344 TokenType.WAREHOUSE, 345 TokenType.STREAMLIT, 346 } 347 348 CREATABLES = { 349 TokenType.COLUMN, 350 TokenType.CONSTRAINT, 351 TokenType.FOREIGN_KEY, 352 TokenType.FUNCTION, 353 TokenType.INDEX, 354 TokenType.PROCEDURE, 355 *DB_CREATABLES, 356 } 357 358 # Tokens that can represent identifiers 359 ID_VAR_TOKENS = { 360 TokenType.VAR, 361 TokenType.ANTI, 362 TokenType.APPLY, 363 TokenType.ASC, 364 TokenType.ASOF, 365 TokenType.AUTO_INCREMENT, 366 TokenType.BEGIN, 367 TokenType.BPCHAR, 368 TokenType.CACHE, 369 TokenType.CASE, 370 TokenType.COLLATE, 371 TokenType.COMMAND, 372 TokenType.COMMENT, 373 TokenType.COMMIT, 374 TokenType.CONSTRAINT, 375 TokenType.COPY, 376 TokenType.DEFAULT, 377 TokenType.DELETE, 378 TokenType.DESC, 379 TokenType.DESCRIBE, 380 TokenType.DICTIONARY, 381 TokenType.DIV, 382 TokenType.END, 383 TokenType.EXECUTE, 384 TokenType.ESCAPE, 385 TokenType.FALSE, 386 TokenType.FIRST, 387 TokenType.FILTER, 388 TokenType.FINAL, 389 TokenType.FORMAT, 390 TokenType.FULL, 391 TokenType.IDENTIFIER, 392 TokenType.IS, 393 TokenType.ISNULL, 394 TokenType.INTERVAL, 395 TokenType.KEEP, 396 TokenType.KILL, 397 TokenType.LEFT, 398 TokenType.LOAD, 399 TokenType.MERGE, 400 TokenType.NATURAL, 401 TokenType.NEXT, 402 TokenType.OFFSET, 403 TokenType.OPERATOR, 404 TokenType.ORDINALITY, 405 TokenType.OVERLAPS, 406 TokenType.OVERWRITE, 407 TokenType.PARTITION, 408 TokenType.PERCENT, 409 TokenType.PIVOT, 410 TokenType.PRAGMA, 411 TokenType.RANGE, 412 TokenType.RECURSIVE, 413 TokenType.REFERENCES, 414 TokenType.REFRESH, 415 TokenType.REPLACE, 416 TokenType.RIGHT, 417 TokenType.ROLLUP, 418 TokenType.ROW, 419 TokenType.ROWS, 420 TokenType.SEMI, 421 TokenType.SET, 422 TokenType.SETTINGS, 423 TokenType.SHOW, 424 TokenType.TEMPORARY, 425 TokenType.TOP, 426 TokenType.TRUE, 427 TokenType.TRUNCATE, 428 TokenType.UNIQUE, 429 TokenType.UNNEST, 430 TokenType.UNPIVOT, 431 TokenType.UPDATE, 432 TokenType.USE, 433 TokenType.VOLATILE, 434 TokenType.WINDOW, 435 *CREATABLES, 436 *SUBQUERY_PREDICATES, 437 *TYPE_TOKENS, 438 *NO_PAREN_FUNCTIONS, 439 } 440 441 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 442 443 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 444 TokenType.ANTI, 445 TokenType.APPLY, 446 TokenType.ASOF, 447 TokenType.FULL, 448 TokenType.LEFT, 449 TokenType.LOCK, 450 TokenType.NATURAL, 451 TokenType.OFFSET, 452 TokenType.RIGHT, 453 TokenType.SEMI, 454 TokenType.WINDOW, 455 } 456 457 ALIAS_TOKENS = ID_VAR_TOKENS 458 459 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 460 461 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 462 463 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 464 465 FUNC_TOKENS = { 466 TokenType.COLLATE, 467 TokenType.COMMAND, 468 TokenType.CURRENT_DATE, 469 TokenType.CURRENT_DATETIME, 470 TokenType.CURRENT_TIMESTAMP, 471 TokenType.CURRENT_TIME, 472 TokenType.CURRENT_USER, 473 TokenType.FILTER, 474 TokenType.FIRST, 475 TokenType.FORMAT, 476 TokenType.GLOB, 477 TokenType.IDENTIFIER, 478 TokenType.INDEX, 479 TokenType.ISNULL, 480 TokenType.ILIKE, 481 TokenType.INSERT, 482 TokenType.LIKE, 483 TokenType.MERGE, 484 TokenType.OFFSET, 485 TokenType.PRIMARY_KEY, 486 TokenType.RANGE, 487 TokenType.REPLACE, 488 TokenType.RLIKE, 489 TokenType.ROW, 490 TokenType.UNNEST, 491 TokenType.VAR, 492 TokenType.LEFT, 493 TokenType.RIGHT, 494 TokenType.SEQUENCE, 495 TokenType.DATE, 496 TokenType.DATETIME, 497 TokenType.TABLE, 498 TokenType.TIMESTAMP, 499 TokenType.TIMESTAMPTZ, 500 TokenType.TRUNCATE, 501 TokenType.WINDOW, 502 TokenType.XOR, 503 *TYPE_TOKENS, 504 *SUBQUERY_PREDICATES, 505 } 506 507 CONJUNCTION = { 508 TokenType.AND: exp.And, 509 TokenType.OR: exp.Or, 510 } 511 512 EQUALITY = { 513 TokenType.EQ: exp.EQ, 514 TokenType.NEQ: exp.NEQ, 515 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 516 } 517 518 COMPARISON = { 519 TokenType.GT: exp.GT, 520 TokenType.GTE: exp.GTE, 521 TokenType.LT: exp.LT, 522 TokenType.LTE: exp.LTE, 523 } 524 525 BITWISE = { 526 TokenType.AMP: exp.BitwiseAnd, 527 TokenType.CARET: exp.BitwiseXor, 528 TokenType.PIPE: exp.BitwiseOr, 529 } 530 531 TERM = { 532 TokenType.DASH: exp.Sub, 533 TokenType.PLUS: exp.Add, 534 TokenType.MOD: exp.Mod, 535 TokenType.COLLATE: exp.Collate, 536 } 537 538 FACTOR = { 539 TokenType.DIV: exp.IntDiv, 540 TokenType.LR_ARROW: exp.Distance, 541 TokenType.SLASH: exp.Div, 542 TokenType.STAR: exp.Mul, 543 } 544 545 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 546 547 TIMES = { 548 TokenType.TIME, 549 TokenType.TIMETZ, 550 } 551 552 TIMESTAMPS = { 553 TokenType.TIMESTAMP, 554 TokenType.TIMESTAMPTZ, 555 TokenType.TIMESTAMPLTZ, 556 *TIMES, 557 } 558 559 SET_OPERATIONS = { 560 TokenType.UNION, 561 TokenType.INTERSECT, 562 TokenType.EXCEPT, 563 } 564 565 JOIN_METHODS = { 566 TokenType.ASOF, 567 TokenType.NATURAL, 568 TokenType.POSITIONAL, 569 } 570 571 JOIN_SIDES = { 572 TokenType.LEFT, 573 TokenType.RIGHT, 574 TokenType.FULL, 575 } 576 577 JOIN_KINDS = { 578 TokenType.INNER, 579 TokenType.OUTER, 580 TokenType.CROSS, 581 TokenType.SEMI, 582 TokenType.ANTI, 583 } 584 585 JOIN_HINTS: t.Set[str] = set() 586 587 LAMBDAS = { 588 TokenType.ARROW: lambda self, expressions: self.expression( 589 exp.Lambda, 590 this=self._replace_lambda( 591 self._parse_conjunction(), 592 expressions, 593 ), 594 expressions=expressions, 595 ), 596 TokenType.FARROW: lambda self, expressions: self.expression( 597 exp.Kwarg, 598 this=exp.var(expressions[0].name), 599 expression=self._parse_conjunction(), 600 ), 601 } 602 603 COLUMN_OPERATORS = { 604 TokenType.DOT: None, 605 TokenType.DCOLON: lambda self, this, to: self.expression( 606 exp.Cast if self.STRICT_CAST else exp.TryCast, 607 this=this, 608 to=to, 609 ), 610 TokenType.ARROW: lambda self, this, path: self.expression( 611 exp.JSONExtract, 612 this=this, 613 expression=self.dialect.to_json_path(path), 614 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 615 ), 616 TokenType.DARROW: lambda self, this, path: self.expression( 617 exp.JSONExtractScalar, 618 this=this, 619 expression=self.dialect.to_json_path(path), 620 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 621 ), 622 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 623 exp.JSONBExtract, 624 this=this, 625 expression=path, 626 ), 627 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 628 exp.JSONBExtractScalar, 629 this=this, 630 expression=path, 631 ), 632 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 633 exp.JSONBContains, 634 this=this, 635 expression=key, 636 ), 637 } 638 639 EXPRESSION_PARSERS = { 640 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 641 exp.Column: lambda self: self._parse_column(), 642 exp.Condition: lambda self: self._parse_conjunction(), 643 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 644 exp.Expression: lambda self: self._parse_expression(), 645 exp.From: lambda self: self._parse_from(joins=True), 646 exp.Group: lambda self: self._parse_group(), 647 exp.Having: lambda self: self._parse_having(), 648 exp.Identifier: lambda self: self._parse_id_var(), 649 exp.Join: lambda self: self._parse_join(), 650 exp.Lambda: lambda self: self._parse_lambda(), 651 exp.Lateral: lambda self: self._parse_lateral(), 652 exp.Limit: lambda self: self._parse_limit(), 653 exp.Offset: lambda self: self._parse_offset(), 654 exp.Order: lambda self: self._parse_order(), 655 exp.Ordered: lambda self: self._parse_ordered(), 656 exp.Properties: lambda self: self._parse_properties(), 657 exp.Qualify: lambda self: self._parse_qualify(), 658 exp.Returning: lambda self: self._parse_returning(), 659 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 660 exp.Table: lambda self: self._parse_table_parts(), 661 exp.TableAlias: lambda self: self._parse_table_alias(), 662 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 663 exp.Where: lambda self: self._parse_where(), 664 exp.Window: lambda self: self._parse_named_window(), 665 exp.With: lambda self: self._parse_with(), 666 "JOIN_TYPE": lambda self: self._parse_join_parts(), 667 } 668 669 STATEMENT_PARSERS = { 670 TokenType.ALTER: lambda self: self._parse_alter(), 671 TokenType.BEGIN: lambda self: self._parse_transaction(), 672 TokenType.CACHE: lambda self: self._parse_cache(), 673 TokenType.COMMENT: lambda self: self._parse_comment(), 674 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 675 TokenType.COPY: lambda self: self._parse_copy(), 676 TokenType.CREATE: lambda self: self._parse_create(), 677 TokenType.DELETE: lambda self: self._parse_delete(), 678 TokenType.DESC: lambda self: self._parse_describe(), 679 TokenType.DESCRIBE: lambda self: self._parse_describe(), 680 TokenType.DROP: lambda self: self._parse_drop(), 681 TokenType.INSERT: lambda self: self._parse_insert(), 682 TokenType.KILL: lambda self: self._parse_kill(), 683 TokenType.LOAD: lambda self: self._parse_load(), 684 TokenType.MERGE: lambda self: self._parse_merge(), 685 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 686 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 687 TokenType.REFRESH: lambda self: self._parse_refresh(), 688 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 689 TokenType.SET: lambda self: self._parse_set(), 690 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 691 TokenType.UNCACHE: lambda self: self._parse_uncache(), 692 TokenType.UPDATE: lambda self: self._parse_update(), 693 TokenType.USE: lambda self: self.expression( 694 exp.Use, 695 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 696 this=self._parse_table(schema=False), 697 ), 698 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 699 } 700 701 UNARY_PARSERS = { 702 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 703 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 704 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 705 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 706 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 707 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 708 } 709 710 STRING_PARSERS = { 711 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 712 exp.RawString, this=token.text 713 ), 714 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 715 exp.National, this=token.text 716 ), 717 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 718 TokenType.STRING: lambda self, token: self.expression( 719 exp.Literal, this=token.text, is_string=True 720 ), 721 TokenType.UNICODE_STRING: lambda self, token: self.expression( 722 exp.UnicodeString, 723 this=token.text, 724 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 725 ), 726 } 727 728 NUMERIC_PARSERS = { 729 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 730 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 731 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 732 TokenType.NUMBER: lambda self, token: self.expression( 733 exp.Literal, this=token.text, is_string=False 734 ), 735 } 736 737 PRIMARY_PARSERS = { 738 **STRING_PARSERS, 739 **NUMERIC_PARSERS, 740 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 741 TokenType.NULL: lambda self, _: self.expression(exp.Null), 742 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 743 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 744 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 745 TokenType.STAR: lambda self, _: self.expression( 746 exp.Star, 747 **{ 748 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 749 "replace": self._parse_star_op("REPLACE"), 750 "rename": self._parse_star_op("RENAME"), 751 }, 752 ), 753 } 754 755 PLACEHOLDER_PARSERS = { 756 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 757 TokenType.PARAMETER: lambda self: self._parse_parameter(), 758 TokenType.COLON: lambda self: ( 759 self.expression(exp.Placeholder, this=self._prev.text) 760 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 761 else None 762 ), 763 } 764 765 RANGE_PARSERS = { 766 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 767 TokenType.GLOB: binary_range_parser(exp.Glob), 768 TokenType.ILIKE: binary_range_parser(exp.ILike), 769 TokenType.IN: lambda self, this: self._parse_in(this), 770 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 771 TokenType.IS: lambda self, this: self._parse_is(this), 772 TokenType.LIKE: binary_range_parser(exp.Like), 773 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 774 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 775 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 776 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 777 } 778 779 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 780 "ALLOWED_VALUES": lambda self: self.expression( 781 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 782 ), 783 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 784 "AUTO": lambda self: self._parse_auto_property(), 785 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 786 "BACKUP": lambda self: self.expression( 787 exp.BackupProperty, this=self._parse_var(any_token=True) 788 ), 789 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 790 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 791 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 792 "CHECKSUM": lambda self: self._parse_checksum(), 793 "CLUSTER BY": lambda self: self._parse_cluster(), 794 "CLUSTERED": lambda self: self._parse_clustered_by(), 795 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 796 exp.CollateProperty, **kwargs 797 ), 798 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 799 "CONTAINS": lambda self: self._parse_contains_property(), 800 "COPY": lambda self: self._parse_copy_property(), 801 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 802 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 803 "DEFINER": lambda self: self._parse_definer(), 804 "DETERMINISTIC": lambda self: self.expression( 805 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 806 ), 807 "DISTKEY": lambda self: self._parse_distkey(), 808 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 809 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 810 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 811 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 812 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 813 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 814 "FREESPACE": lambda self: self._parse_freespace(), 815 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 816 "HEAP": lambda self: self.expression(exp.HeapProperty), 817 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 818 "IMMUTABLE": lambda self: self.expression( 819 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 820 ), 821 "INHERITS": lambda self: self.expression( 822 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 823 ), 824 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 825 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 826 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 827 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 828 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 829 "LIKE": lambda self: self._parse_create_like(), 830 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 831 "LOCK": lambda self: self._parse_locking(), 832 "LOCKING": lambda self: self._parse_locking(), 833 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 834 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 835 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 836 "MODIFIES": lambda self: self._parse_modifies_property(), 837 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 838 "NO": lambda self: self._parse_no_property(), 839 "ON": lambda self: self._parse_on_property(), 840 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 841 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 842 "PARTITION": lambda self: self._parse_partitioned_of(), 843 "PARTITION BY": lambda self: self._parse_partitioned_by(), 844 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 845 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 846 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 847 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 848 "READS": lambda self: self._parse_reads_property(), 849 "REMOTE": lambda self: self._parse_remote_with_connection(), 850 "RETURNS": lambda self: self._parse_returns(), 851 "STRICT": lambda self: self.expression(exp.StrictProperty), 852 "ROW": lambda self: self._parse_row(), 853 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 854 "SAMPLE": lambda self: self.expression( 855 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 856 ), 857 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 858 "SETTINGS": lambda self: self.expression( 859 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 860 ), 861 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 862 "SORTKEY": lambda self: self._parse_sortkey(), 863 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 864 "STABLE": lambda self: self.expression( 865 exp.StabilityProperty, this=exp.Literal.string("STABLE") 866 ), 867 "STORED": lambda self: self._parse_stored(), 868 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 869 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 870 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 871 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 872 "TO": lambda self: self._parse_to_table(), 873 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 874 "TRANSFORM": lambda self: self.expression( 875 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 876 ), 877 "TTL": lambda self: self._parse_ttl(), 878 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 879 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 880 "VOLATILE": lambda self: self._parse_volatile_property(), 881 "WITH": lambda self: self._parse_with_property(), 882 } 883 884 CONSTRAINT_PARSERS = { 885 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 886 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 887 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 888 "CHARACTER SET": lambda self: self.expression( 889 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 890 ), 891 "CHECK": lambda self: self.expression( 892 exp.CheckColumnConstraint, 893 this=self._parse_wrapped(self._parse_conjunction), 894 enforced=self._match_text_seq("ENFORCED"), 895 ), 896 "COLLATE": lambda self: self.expression( 897 exp.CollateColumnConstraint, this=self._parse_var() 898 ), 899 "COMMENT": lambda self: self.expression( 900 exp.CommentColumnConstraint, this=self._parse_string() 901 ), 902 "COMPRESS": lambda self: self._parse_compress(), 903 "CLUSTERED": lambda self: self.expression( 904 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 905 ), 906 "NONCLUSTERED": lambda self: self.expression( 907 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 908 ), 909 "DEFAULT": lambda self: self.expression( 910 exp.DefaultColumnConstraint, this=self._parse_bitwise() 911 ), 912 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 913 "EPHEMERAL": lambda self: self.expression( 914 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 915 ), 916 "EXCLUDE": lambda self: self.expression( 917 exp.ExcludeColumnConstraint, this=self._parse_index_params() 918 ), 919 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 920 "FORMAT": lambda self: self.expression( 921 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 922 ), 923 "GENERATED": lambda self: self._parse_generated_as_identity(), 924 "IDENTITY": lambda self: self._parse_auto_increment(), 925 "INLINE": lambda self: self._parse_inline(), 926 "LIKE": lambda self: self._parse_create_like(), 927 "NOT": lambda self: self._parse_not_constraint(), 928 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 929 "ON": lambda self: ( 930 self._match(TokenType.UPDATE) 931 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 932 ) 933 or self.expression(exp.OnProperty, this=self._parse_id_var()), 934 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 935 "PERIOD": lambda self: self._parse_period_for_system_time(), 936 "PRIMARY KEY": lambda self: self._parse_primary_key(), 937 "REFERENCES": lambda self: self._parse_references(match=False), 938 "TITLE": lambda self: self.expression( 939 exp.TitleColumnConstraint, this=self._parse_var_or_string() 940 ), 941 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 942 "UNIQUE": lambda self: self._parse_unique(), 943 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 944 "WITH": lambda self: self.expression( 945 exp.Properties, expressions=self._parse_wrapped_properties() 946 ), 947 } 948 949 ALTER_PARSERS = { 950 "ADD": lambda self: self._parse_alter_table_add(), 951 "ALTER": lambda self: self._parse_alter_table_alter(), 952 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 953 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 954 "DROP": lambda self: self._parse_alter_table_drop(), 955 "RENAME": lambda self: self._parse_alter_table_rename(), 956 "SET": lambda self: self._parse_alter_table_set(), 957 } 958 959 ALTER_ALTER_PARSERS = { 960 "DISTKEY": lambda self: self._parse_alter_diststyle(), 961 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 962 "SORTKEY": lambda self: self._parse_alter_sortkey(), 963 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 964 } 965 966 SCHEMA_UNNAMED_CONSTRAINTS = { 967 "CHECK", 968 "EXCLUDE", 969 "FOREIGN KEY", 970 "LIKE", 971 "PERIOD", 972 "PRIMARY KEY", 973 "UNIQUE", 974 } 975 976 NO_PAREN_FUNCTION_PARSERS = { 977 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 978 "CASE": lambda self: self._parse_case(), 979 "IF": lambda self: self._parse_if(), 980 "NEXT": lambda self: self._parse_next_value_for(), 981 } 982 983 INVALID_FUNC_NAME_TOKENS = { 984 TokenType.IDENTIFIER, 985 TokenType.STRING, 986 } 987 988 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 989 990 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 991 992 FUNCTION_PARSERS = { 993 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 994 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 995 "DECODE": lambda self: self._parse_decode(), 996 "EXTRACT": lambda self: self._parse_extract(), 997 "JSON_OBJECT": lambda self: self._parse_json_object(), 998 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 999 "JSON_TABLE": lambda self: self._parse_json_table(), 1000 "MATCH": lambda self: self._parse_match_against(), 1001 "OPENJSON": lambda self: self._parse_open_json(), 1002 "POSITION": lambda self: self._parse_position(), 1003 "PREDICT": lambda self: self._parse_predict(), 1004 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1005 "STRING_AGG": lambda self: self._parse_string_agg(), 1006 "SUBSTRING": lambda self: self._parse_substring(), 1007 "TRIM": lambda self: self._parse_trim(), 1008 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1009 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1010 } 1011 1012 QUERY_MODIFIER_PARSERS = { 1013 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1014 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1015 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1016 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1017 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1018 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1019 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1020 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1021 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1022 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1023 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1024 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1025 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1026 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1027 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1028 TokenType.CLUSTER_BY: lambda self: ( 1029 "cluster", 1030 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1031 ), 1032 TokenType.DISTRIBUTE_BY: lambda self: ( 1033 "distribute", 1034 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1035 ), 1036 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1037 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1038 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1039 } 1040 1041 SET_PARSERS = { 1042 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1043 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1044 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1045 "TRANSACTION": lambda self: self._parse_set_transaction(), 1046 } 1047 1048 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1049 1050 TYPE_LITERAL_PARSERS = { 1051 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1052 } 1053 1054 TYPE_CONVERTER: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1055 1056 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1057 1058 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1059 1060 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1061 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1062 "ISOLATION": ( 1063 ("LEVEL", "REPEATABLE", "READ"), 1064 ("LEVEL", "READ", "COMMITTED"), 1065 ("LEVEL", "READ", "UNCOMITTED"), 1066 ("LEVEL", "SERIALIZABLE"), 1067 ), 1068 "READ": ("WRITE", "ONLY"), 1069 } 1070 1071 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1072 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1073 ) 1074 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1075 1076 CREATE_SEQUENCE: OPTIONS_TYPE = { 1077 "SCALE": ("EXTEND", "NOEXTEND"), 1078 "SHARD": ("EXTEND", "NOEXTEND"), 1079 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1080 **dict.fromkeys( 1081 ( 1082 "SESSION", 1083 "GLOBAL", 1084 "KEEP", 1085 "NOKEEP", 1086 "ORDER", 1087 "NOORDER", 1088 "NOCACHE", 1089 "CYCLE", 1090 "NOCYCLE", 1091 "NOMINVALUE", 1092 "NOMAXVALUE", 1093 "NOSCALE", 1094 "NOSHARD", 1095 ), 1096 tuple(), 1097 ), 1098 } 1099 1100 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1101 1102 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1103 1104 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1105 1106 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1107 1108 CLONE_KEYWORDS = {"CLONE", "COPY"} 1109 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1110 1111 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1112 1113 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1114 1115 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1116 1117 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1118 1119 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1120 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1121 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1122 1123 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1124 1125 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1126 1127 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1128 1129 DISTINCT_TOKENS = {TokenType.DISTINCT} 1130 1131 NULL_TOKENS = {TokenType.NULL} 1132 1133 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1134 1135 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1136 1137 STRICT_CAST = True 1138 1139 PREFIXED_PIVOT_COLUMNS = False 1140 IDENTIFY_PIVOT_STRINGS = False 1141 1142 LOG_DEFAULTS_TO_LN = False 1143 1144 # Whether ADD is present for each column added by ALTER TABLE 1145 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1146 1147 # Whether the table sample clause expects CSV syntax 1148 TABLESAMPLE_CSV = False 1149 1150 # The default method used for table sampling 1151 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1152 1153 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1154 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1155 1156 # Whether the TRIM function expects the characters to trim as its first argument 1157 TRIM_PATTERN_FIRST = False 1158 1159 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1160 STRING_ALIASES = False 1161 1162 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1163 MODIFIERS_ATTACHED_TO_UNION = True 1164 UNION_MODIFIERS = {"order", "limit", "offset"} 1165 1166 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1167 NO_PAREN_IF_COMMANDS = True 1168 1169 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1170 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1171 1172 # Whether the `:` operator is used to extract a value from a JSON document 1173 COLON_IS_JSON_EXTRACT = False 1174 1175 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1176 # If this is True and '(' is not found, the keyword will be treated as an identifier 1177 VALUES_FOLLOWED_BY_PAREN = True 1178 1179 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1180 SUPPORTS_IMPLICIT_UNNEST = False 1181 1182 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1183 INTERVAL_SPANS = True 1184 1185 # Whether a PARTITION clause can follow a table reference 1186 SUPPORTS_PARTITION_SELECTION = False 1187 1188 __slots__ = ( 1189 "error_level", 1190 "error_message_context", 1191 "max_errors", 1192 "dialect", 1193 "sql", 1194 "errors", 1195 "_tokens", 1196 "_index", 1197 "_curr", 1198 "_next", 1199 "_prev", 1200 "_prev_comments", 1201 ) 1202 1203 # Autofilled 1204 SHOW_TRIE: t.Dict = {} 1205 SET_TRIE: t.Dict = {} 1206 1207 def __init__( 1208 self, 1209 error_level: t.Optional[ErrorLevel] = None, 1210 error_message_context: int = 100, 1211 max_errors: int = 3, 1212 dialect: DialectType = None, 1213 ): 1214 from sqlglot.dialects import Dialect 1215 1216 self.error_level = error_level or ErrorLevel.IMMEDIATE 1217 self.error_message_context = error_message_context 1218 self.max_errors = max_errors 1219 self.dialect = Dialect.get_or_raise(dialect) 1220 self.reset() 1221 1222 def reset(self): 1223 self.sql = "" 1224 self.errors = [] 1225 self._tokens = [] 1226 self._index = 0 1227 self._curr = None 1228 self._next = None 1229 self._prev = None 1230 self._prev_comments = None 1231 1232 def parse( 1233 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1234 ) -> t.List[t.Optional[exp.Expression]]: 1235 """ 1236 Parses a list of tokens and returns a list of syntax trees, one tree 1237 per parsed SQL statement. 1238 1239 Args: 1240 raw_tokens: The list of tokens. 1241 sql: The original SQL string, used to produce helpful debug messages. 1242 1243 Returns: 1244 The list of the produced syntax trees. 1245 """ 1246 return self._parse( 1247 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1248 ) 1249 1250 def parse_into( 1251 self, 1252 expression_types: exp.IntoType, 1253 raw_tokens: t.List[Token], 1254 sql: t.Optional[str] = None, 1255 ) -> t.List[t.Optional[exp.Expression]]: 1256 """ 1257 Parses a list of tokens into a given Expression type. If a collection of Expression 1258 types is given instead, this method will try to parse the token list into each one 1259 of them, stopping at the first for which the parsing succeeds. 1260 1261 Args: 1262 expression_types: The expression type(s) to try and parse the token list into. 1263 raw_tokens: The list of tokens. 1264 sql: The original SQL string, used to produce helpful debug messages. 1265 1266 Returns: 1267 The target Expression. 1268 """ 1269 errors = [] 1270 for expression_type in ensure_list(expression_types): 1271 parser = self.EXPRESSION_PARSERS.get(expression_type) 1272 if not parser: 1273 raise TypeError(f"No parser registered for {expression_type}") 1274 1275 try: 1276 return self._parse(parser, raw_tokens, sql) 1277 except ParseError as e: 1278 e.errors[0]["into_expression"] = expression_type 1279 errors.append(e) 1280 1281 raise ParseError( 1282 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1283 errors=merge_errors(errors), 1284 ) from errors[-1] 1285 1286 def _parse( 1287 self, 1288 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1289 raw_tokens: t.List[Token], 1290 sql: t.Optional[str] = None, 1291 ) -> t.List[t.Optional[exp.Expression]]: 1292 self.reset() 1293 self.sql = sql or "" 1294 1295 total = len(raw_tokens) 1296 chunks: t.List[t.List[Token]] = [[]] 1297 1298 for i, token in enumerate(raw_tokens): 1299 if token.token_type == TokenType.SEMICOLON: 1300 if token.comments: 1301 chunks.append([token]) 1302 1303 if i < total - 1: 1304 chunks.append([]) 1305 else: 1306 chunks[-1].append(token) 1307 1308 expressions = [] 1309 1310 for tokens in chunks: 1311 self._index = -1 1312 self._tokens = tokens 1313 self._advance() 1314 1315 expressions.append(parse_method(self)) 1316 1317 if self._index < len(self._tokens): 1318 self.raise_error("Invalid expression / Unexpected token") 1319 1320 self.check_errors() 1321 1322 return expressions 1323 1324 def check_errors(self) -> None: 1325 """Logs or raises any found errors, depending on the chosen error level setting.""" 1326 if self.error_level == ErrorLevel.WARN: 1327 for error in self.errors: 1328 logger.error(str(error)) 1329 elif self.error_level == ErrorLevel.RAISE and self.errors: 1330 raise ParseError( 1331 concat_messages(self.errors, self.max_errors), 1332 errors=merge_errors(self.errors), 1333 ) 1334 1335 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1336 """ 1337 Appends an error in the list of recorded errors or raises it, depending on the chosen 1338 error level setting. 1339 """ 1340 token = token or self._curr or self._prev or Token.string("") 1341 start = token.start 1342 end = token.end + 1 1343 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1344 highlight = self.sql[start:end] 1345 end_context = self.sql[end : end + self.error_message_context] 1346 1347 error = ParseError.new( 1348 f"{message}. Line {token.line}, Col: {token.col}.\n" 1349 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1350 description=message, 1351 line=token.line, 1352 col=token.col, 1353 start_context=start_context, 1354 highlight=highlight, 1355 end_context=end_context, 1356 ) 1357 1358 if self.error_level == ErrorLevel.IMMEDIATE: 1359 raise error 1360 1361 self.errors.append(error) 1362 1363 def expression( 1364 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1365 ) -> E: 1366 """ 1367 Creates a new, validated Expression. 1368 1369 Args: 1370 exp_class: The expression class to instantiate. 1371 comments: An optional list of comments to attach to the expression. 1372 kwargs: The arguments to set for the expression along with their respective values. 1373 1374 Returns: 1375 The target expression. 1376 """ 1377 instance = exp_class(**kwargs) 1378 instance.add_comments(comments) if comments else self._add_comments(instance) 1379 return self.validate_expression(instance) 1380 1381 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1382 if expression and self._prev_comments: 1383 expression.add_comments(self._prev_comments) 1384 self._prev_comments = None 1385 1386 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1387 """ 1388 Validates an Expression, making sure that all its mandatory arguments are set. 1389 1390 Args: 1391 expression: The expression to validate. 1392 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1393 1394 Returns: 1395 The validated expression. 1396 """ 1397 if self.error_level != ErrorLevel.IGNORE: 1398 for error_message in expression.error_messages(args): 1399 self.raise_error(error_message) 1400 1401 return expression 1402 1403 def _find_sql(self, start: Token, end: Token) -> str: 1404 return self.sql[start.start : end.end + 1] 1405 1406 def _is_connected(self) -> bool: 1407 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1408 1409 def _advance(self, times: int = 1) -> None: 1410 self._index += times 1411 self._curr = seq_get(self._tokens, self._index) 1412 self._next = seq_get(self._tokens, self._index + 1) 1413 1414 if self._index > 0: 1415 self._prev = self._tokens[self._index - 1] 1416 self._prev_comments = self._prev.comments 1417 else: 1418 self._prev = None 1419 self._prev_comments = None 1420 1421 def _retreat(self, index: int) -> None: 1422 if index != self._index: 1423 self._advance(index - self._index) 1424 1425 def _warn_unsupported(self) -> None: 1426 if len(self._tokens) <= 1: 1427 return 1428 1429 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1430 # interested in emitting a warning for the one being currently processed. 1431 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1432 1433 logger.warning( 1434 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1435 ) 1436 1437 def _parse_command(self) -> exp.Command: 1438 self._warn_unsupported() 1439 return self.expression( 1440 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1441 ) 1442 1443 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1444 """ 1445 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1446 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1447 the parser state accordingly 1448 """ 1449 index = self._index 1450 error_level = self.error_level 1451 1452 self.error_level = ErrorLevel.IMMEDIATE 1453 try: 1454 this = parse_method() 1455 except ParseError: 1456 this = None 1457 finally: 1458 if not this or retreat: 1459 self._retreat(index) 1460 self.error_level = error_level 1461 1462 return this 1463 1464 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1465 start = self._prev 1466 exists = self._parse_exists() if allow_exists else None 1467 1468 self._match(TokenType.ON) 1469 1470 materialized = self._match_text_seq("MATERIALIZED") 1471 kind = self._match_set(self.CREATABLES) and self._prev 1472 if not kind: 1473 return self._parse_as_command(start) 1474 1475 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1476 this = self._parse_user_defined_function(kind=kind.token_type) 1477 elif kind.token_type == TokenType.TABLE: 1478 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1479 elif kind.token_type == TokenType.COLUMN: 1480 this = self._parse_column() 1481 else: 1482 this = self._parse_id_var() 1483 1484 self._match(TokenType.IS) 1485 1486 return self.expression( 1487 exp.Comment, 1488 this=this, 1489 kind=kind.text, 1490 expression=self._parse_string(), 1491 exists=exists, 1492 materialized=materialized, 1493 ) 1494 1495 def _parse_to_table( 1496 self, 1497 ) -> exp.ToTableProperty: 1498 table = self._parse_table_parts(schema=True) 1499 return self.expression(exp.ToTableProperty, this=table) 1500 1501 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1502 def _parse_ttl(self) -> exp.Expression: 1503 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1504 this = self._parse_bitwise() 1505 1506 if self._match_text_seq("DELETE"): 1507 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1508 if self._match_text_seq("RECOMPRESS"): 1509 return self.expression( 1510 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1511 ) 1512 if self._match_text_seq("TO", "DISK"): 1513 return self.expression( 1514 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1515 ) 1516 if self._match_text_seq("TO", "VOLUME"): 1517 return self.expression( 1518 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1519 ) 1520 1521 return this 1522 1523 expressions = self._parse_csv(_parse_ttl_action) 1524 where = self._parse_where() 1525 group = self._parse_group() 1526 1527 aggregates = None 1528 if group and self._match(TokenType.SET): 1529 aggregates = self._parse_csv(self._parse_set_item) 1530 1531 return self.expression( 1532 exp.MergeTreeTTL, 1533 expressions=expressions, 1534 where=where, 1535 group=group, 1536 aggregates=aggregates, 1537 ) 1538 1539 def _parse_statement(self) -> t.Optional[exp.Expression]: 1540 if self._curr is None: 1541 return None 1542 1543 if self._match_set(self.STATEMENT_PARSERS): 1544 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1545 1546 if self._match_set(self.dialect.tokenizer.COMMANDS): 1547 return self._parse_command() 1548 1549 expression = self._parse_expression() 1550 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1551 return self._parse_query_modifiers(expression) 1552 1553 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1554 start = self._prev 1555 temporary = self._match(TokenType.TEMPORARY) 1556 materialized = self._match_text_seq("MATERIALIZED") 1557 1558 kind = self._match_set(self.CREATABLES) and self._prev.text 1559 if not kind: 1560 return self._parse_as_command(start) 1561 1562 if_exists = exists or self._parse_exists() 1563 table = self._parse_table_parts( 1564 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1565 ) 1566 1567 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1568 1569 if self._match(TokenType.L_PAREN, advance=False): 1570 expressions = self._parse_wrapped_csv(self._parse_types) 1571 else: 1572 expressions = None 1573 1574 return self.expression( 1575 exp.Drop, 1576 comments=start.comments, 1577 exists=if_exists, 1578 this=table, 1579 expressions=expressions, 1580 kind=kind.upper(), 1581 temporary=temporary, 1582 materialized=materialized, 1583 cascade=self._match_text_seq("CASCADE"), 1584 constraints=self._match_text_seq("CONSTRAINTS"), 1585 purge=self._match_text_seq("PURGE"), 1586 cluster=cluster, 1587 ) 1588 1589 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1590 return ( 1591 self._match_text_seq("IF") 1592 and (not not_ or self._match(TokenType.NOT)) 1593 and self._match(TokenType.EXISTS) 1594 ) 1595 1596 def _parse_create(self) -> exp.Create | exp.Command: 1597 # Note: this can't be None because we've matched a statement parser 1598 start = self._prev 1599 comments = self._prev_comments 1600 1601 replace = ( 1602 start.token_type == TokenType.REPLACE 1603 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1604 or self._match_pair(TokenType.OR, TokenType.ALTER) 1605 ) 1606 1607 unique = self._match(TokenType.UNIQUE) 1608 1609 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1610 self._advance() 1611 1612 properties = None 1613 create_token = self._match_set(self.CREATABLES) and self._prev 1614 1615 if not create_token: 1616 # exp.Properties.Location.POST_CREATE 1617 properties = self._parse_properties() 1618 create_token = self._match_set(self.CREATABLES) and self._prev 1619 1620 if not properties or not create_token: 1621 return self._parse_as_command(start) 1622 1623 exists = self._parse_exists(not_=True) 1624 this = None 1625 expression: t.Optional[exp.Expression] = None 1626 indexes = None 1627 no_schema_binding = None 1628 begin = None 1629 end = None 1630 clone = None 1631 1632 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1633 nonlocal properties 1634 if properties and temp_props: 1635 properties.expressions.extend(temp_props.expressions) 1636 elif temp_props: 1637 properties = temp_props 1638 1639 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1640 this = self._parse_user_defined_function(kind=create_token.token_type) 1641 1642 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1643 extend_props(self._parse_properties()) 1644 1645 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1646 extend_props(self._parse_properties()) 1647 1648 if not expression: 1649 if self._match(TokenType.COMMAND): 1650 expression = self._parse_as_command(self._prev) 1651 else: 1652 begin = self._match(TokenType.BEGIN) 1653 return_ = self._match_text_seq("RETURN") 1654 1655 if self._match(TokenType.STRING, advance=False): 1656 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1657 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1658 expression = self._parse_string() 1659 extend_props(self._parse_properties()) 1660 else: 1661 expression = self._parse_statement() 1662 1663 end = self._match_text_seq("END") 1664 1665 if return_: 1666 expression = self.expression(exp.Return, this=expression) 1667 elif create_token.token_type == TokenType.INDEX: 1668 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1669 if not self._match(TokenType.ON): 1670 index = self._parse_id_var() 1671 anonymous = False 1672 else: 1673 index = None 1674 anonymous = True 1675 1676 this = self._parse_index(index=index, anonymous=anonymous) 1677 elif create_token.token_type in self.DB_CREATABLES: 1678 table_parts = self._parse_table_parts( 1679 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1680 ) 1681 1682 # exp.Properties.Location.POST_NAME 1683 self._match(TokenType.COMMA) 1684 extend_props(self._parse_properties(before=True)) 1685 1686 this = self._parse_schema(this=table_parts) 1687 1688 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1689 extend_props(self._parse_properties()) 1690 1691 self._match(TokenType.ALIAS) 1692 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1693 # exp.Properties.Location.POST_ALIAS 1694 extend_props(self._parse_properties()) 1695 1696 if create_token.token_type == TokenType.SEQUENCE: 1697 expression = self._parse_types() 1698 extend_props(self._parse_properties()) 1699 else: 1700 expression = self._parse_ddl_select() 1701 1702 if create_token.token_type == TokenType.TABLE: 1703 # exp.Properties.Location.POST_EXPRESSION 1704 extend_props(self._parse_properties()) 1705 1706 indexes = [] 1707 while True: 1708 index = self._parse_index() 1709 1710 # exp.Properties.Location.POST_INDEX 1711 extend_props(self._parse_properties()) 1712 1713 if not index: 1714 break 1715 else: 1716 self._match(TokenType.COMMA) 1717 indexes.append(index) 1718 elif create_token.token_type == TokenType.VIEW: 1719 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1720 no_schema_binding = True 1721 1722 shallow = self._match_text_seq("SHALLOW") 1723 1724 if self._match_texts(self.CLONE_KEYWORDS): 1725 copy = self._prev.text.lower() == "copy" 1726 clone = self.expression( 1727 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1728 ) 1729 1730 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1731 return self._parse_as_command(start) 1732 1733 return self.expression( 1734 exp.Create, 1735 comments=comments, 1736 this=this, 1737 kind=create_token.text.upper(), 1738 replace=replace, 1739 unique=unique, 1740 expression=expression, 1741 exists=exists, 1742 properties=properties, 1743 indexes=indexes, 1744 no_schema_binding=no_schema_binding, 1745 begin=begin, 1746 end=end, 1747 clone=clone, 1748 ) 1749 1750 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1751 seq = exp.SequenceProperties() 1752 1753 options = [] 1754 index = self._index 1755 1756 while self._curr: 1757 self._match(TokenType.COMMA) 1758 if self._match_text_seq("INCREMENT"): 1759 self._match_text_seq("BY") 1760 self._match_text_seq("=") 1761 seq.set("increment", self._parse_term()) 1762 elif self._match_text_seq("MINVALUE"): 1763 seq.set("minvalue", self._parse_term()) 1764 elif self._match_text_seq("MAXVALUE"): 1765 seq.set("maxvalue", self._parse_term()) 1766 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1767 self._match_text_seq("=") 1768 seq.set("start", self._parse_term()) 1769 elif self._match_text_seq("CACHE"): 1770 # T-SQL allows empty CACHE which is initialized dynamically 1771 seq.set("cache", self._parse_number() or True) 1772 elif self._match_text_seq("OWNED", "BY"): 1773 # "OWNED BY NONE" is the default 1774 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1775 else: 1776 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1777 if opt: 1778 options.append(opt) 1779 else: 1780 break 1781 1782 seq.set("options", options if options else None) 1783 return None if self._index == index else seq 1784 1785 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1786 # only used for teradata currently 1787 self._match(TokenType.COMMA) 1788 1789 kwargs = { 1790 "no": self._match_text_seq("NO"), 1791 "dual": self._match_text_seq("DUAL"), 1792 "before": self._match_text_seq("BEFORE"), 1793 "default": self._match_text_seq("DEFAULT"), 1794 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1795 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1796 "after": self._match_text_seq("AFTER"), 1797 "minimum": self._match_texts(("MIN", "MINIMUM")), 1798 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1799 } 1800 1801 if self._match_texts(self.PROPERTY_PARSERS): 1802 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1803 try: 1804 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1805 except TypeError: 1806 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1807 1808 return None 1809 1810 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1811 return self._parse_wrapped_csv(self._parse_property) 1812 1813 def _parse_property(self) -> t.Optional[exp.Expression]: 1814 if self._match_texts(self.PROPERTY_PARSERS): 1815 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1816 1817 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1818 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1819 1820 if self._match_text_seq("COMPOUND", "SORTKEY"): 1821 return self._parse_sortkey(compound=True) 1822 1823 if self._match_text_seq("SQL", "SECURITY"): 1824 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1825 1826 index = self._index 1827 key = self._parse_column() 1828 1829 if not self._match(TokenType.EQ): 1830 self._retreat(index) 1831 return self._parse_sequence_properties() 1832 1833 return self.expression( 1834 exp.Property, 1835 this=key.to_dot() if isinstance(key, exp.Column) else key, 1836 value=self._parse_bitwise() or self._parse_var(any_token=True), 1837 ) 1838 1839 def _parse_stored(self) -> exp.FileFormatProperty: 1840 self._match(TokenType.ALIAS) 1841 1842 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1843 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1844 1845 return self.expression( 1846 exp.FileFormatProperty, 1847 this=( 1848 self.expression( 1849 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1850 ) 1851 if input_format or output_format 1852 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1853 ), 1854 ) 1855 1856 def _parse_unquoted_field(self): 1857 field = self._parse_field() 1858 if isinstance(field, exp.Identifier) and not field.quoted: 1859 field = exp.var(field) 1860 1861 return field 1862 1863 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1864 self._match(TokenType.EQ) 1865 self._match(TokenType.ALIAS) 1866 1867 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1868 1869 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1870 properties = [] 1871 while True: 1872 if before: 1873 prop = self._parse_property_before() 1874 else: 1875 prop = self._parse_property() 1876 if not prop: 1877 break 1878 for p in ensure_list(prop): 1879 properties.append(p) 1880 1881 if properties: 1882 return self.expression(exp.Properties, expressions=properties) 1883 1884 return None 1885 1886 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1887 return self.expression( 1888 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1889 ) 1890 1891 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1892 if self._index >= 2: 1893 pre_volatile_token = self._tokens[self._index - 2] 1894 else: 1895 pre_volatile_token = None 1896 1897 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1898 return exp.VolatileProperty() 1899 1900 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1901 1902 def _parse_retention_period(self) -> exp.Var: 1903 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1904 number = self._parse_number() 1905 number_str = f"{number} " if number else "" 1906 unit = self._parse_var(any_token=True) 1907 return exp.var(f"{number_str}{unit}") 1908 1909 def _parse_system_versioning_property( 1910 self, with_: bool = False 1911 ) -> exp.WithSystemVersioningProperty: 1912 self._match(TokenType.EQ) 1913 prop = self.expression( 1914 exp.WithSystemVersioningProperty, 1915 **{ # type: ignore 1916 "on": True, 1917 "with": with_, 1918 }, 1919 ) 1920 1921 if self._match_text_seq("OFF"): 1922 prop.set("on", False) 1923 return prop 1924 1925 self._match(TokenType.ON) 1926 if self._match(TokenType.L_PAREN): 1927 while self._curr and not self._match(TokenType.R_PAREN): 1928 if self._match_text_seq("HISTORY_TABLE", "="): 1929 prop.set("this", self._parse_table_parts()) 1930 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1931 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1932 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1933 prop.set("retention_period", self._parse_retention_period()) 1934 1935 self._match(TokenType.COMMA) 1936 1937 return prop 1938 1939 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1940 self._match(TokenType.EQ) 1941 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1942 prop = self.expression(exp.DataDeletionProperty, on=on) 1943 1944 if self._match(TokenType.L_PAREN): 1945 while self._curr and not self._match(TokenType.R_PAREN): 1946 if self._match_text_seq("FILTER_COLUMN", "="): 1947 prop.set("filter_column", self._parse_column()) 1948 elif self._match_text_seq("RETENTION_PERIOD", "="): 1949 prop.set("retention_period", self._parse_retention_period()) 1950 1951 self._match(TokenType.COMMA) 1952 1953 return prop 1954 1955 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1956 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1957 prop = self._parse_system_versioning_property(with_=True) 1958 self._match_r_paren() 1959 return prop 1960 1961 if self._match(TokenType.L_PAREN, advance=False): 1962 return self._parse_wrapped_properties() 1963 1964 if self._match_text_seq("JOURNAL"): 1965 return self._parse_withjournaltable() 1966 1967 if self._match_texts(self.VIEW_ATTRIBUTES): 1968 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1969 1970 if self._match_text_seq("DATA"): 1971 return self._parse_withdata(no=False) 1972 elif self._match_text_seq("NO", "DATA"): 1973 return self._parse_withdata(no=True) 1974 1975 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 1976 return self._parse_serde_properties(with_=True) 1977 1978 if not self._next: 1979 return None 1980 1981 return self._parse_withisolatedloading() 1982 1983 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1984 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1985 self._match(TokenType.EQ) 1986 1987 user = self._parse_id_var() 1988 self._match(TokenType.PARAMETER) 1989 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1990 1991 if not user or not host: 1992 return None 1993 1994 return exp.DefinerProperty(this=f"{user}@{host}") 1995 1996 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1997 self._match(TokenType.TABLE) 1998 self._match(TokenType.EQ) 1999 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2000 2001 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2002 return self.expression(exp.LogProperty, no=no) 2003 2004 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2005 return self.expression(exp.JournalProperty, **kwargs) 2006 2007 def _parse_checksum(self) -> exp.ChecksumProperty: 2008 self._match(TokenType.EQ) 2009 2010 on = None 2011 if self._match(TokenType.ON): 2012 on = True 2013 elif self._match_text_seq("OFF"): 2014 on = False 2015 2016 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2017 2018 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2019 return self.expression( 2020 exp.Cluster, 2021 expressions=( 2022 self._parse_wrapped_csv(self._parse_ordered) 2023 if wrapped 2024 else self._parse_csv(self._parse_ordered) 2025 ), 2026 ) 2027 2028 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2029 self._match_text_seq("BY") 2030 2031 self._match_l_paren() 2032 expressions = self._parse_csv(self._parse_column) 2033 self._match_r_paren() 2034 2035 if self._match_text_seq("SORTED", "BY"): 2036 self._match_l_paren() 2037 sorted_by = self._parse_csv(self._parse_ordered) 2038 self._match_r_paren() 2039 else: 2040 sorted_by = None 2041 2042 self._match(TokenType.INTO) 2043 buckets = self._parse_number() 2044 self._match_text_seq("BUCKETS") 2045 2046 return self.expression( 2047 exp.ClusteredByProperty, 2048 expressions=expressions, 2049 sorted_by=sorted_by, 2050 buckets=buckets, 2051 ) 2052 2053 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2054 if not self._match_text_seq("GRANTS"): 2055 self._retreat(self._index - 1) 2056 return None 2057 2058 return self.expression(exp.CopyGrantsProperty) 2059 2060 def _parse_freespace(self) -> exp.FreespaceProperty: 2061 self._match(TokenType.EQ) 2062 return self.expression( 2063 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2064 ) 2065 2066 def _parse_mergeblockratio( 2067 self, no: bool = False, default: bool = False 2068 ) -> exp.MergeBlockRatioProperty: 2069 if self._match(TokenType.EQ): 2070 return self.expression( 2071 exp.MergeBlockRatioProperty, 2072 this=self._parse_number(), 2073 percent=self._match(TokenType.PERCENT), 2074 ) 2075 2076 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2077 2078 def _parse_datablocksize( 2079 self, 2080 default: t.Optional[bool] = None, 2081 minimum: t.Optional[bool] = None, 2082 maximum: t.Optional[bool] = None, 2083 ) -> exp.DataBlocksizeProperty: 2084 self._match(TokenType.EQ) 2085 size = self._parse_number() 2086 2087 units = None 2088 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2089 units = self._prev.text 2090 2091 return self.expression( 2092 exp.DataBlocksizeProperty, 2093 size=size, 2094 units=units, 2095 default=default, 2096 minimum=minimum, 2097 maximum=maximum, 2098 ) 2099 2100 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2101 self._match(TokenType.EQ) 2102 always = self._match_text_seq("ALWAYS") 2103 manual = self._match_text_seq("MANUAL") 2104 never = self._match_text_seq("NEVER") 2105 default = self._match_text_seq("DEFAULT") 2106 2107 autotemp = None 2108 if self._match_text_seq("AUTOTEMP"): 2109 autotemp = self._parse_schema() 2110 2111 return self.expression( 2112 exp.BlockCompressionProperty, 2113 always=always, 2114 manual=manual, 2115 never=never, 2116 default=default, 2117 autotemp=autotemp, 2118 ) 2119 2120 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2121 index = self._index 2122 no = self._match_text_seq("NO") 2123 concurrent = self._match_text_seq("CONCURRENT") 2124 2125 if not self._match_text_seq("ISOLATED", "LOADING"): 2126 self._retreat(index) 2127 return None 2128 2129 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2130 return self.expression( 2131 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2132 ) 2133 2134 def _parse_locking(self) -> exp.LockingProperty: 2135 if self._match(TokenType.TABLE): 2136 kind = "TABLE" 2137 elif self._match(TokenType.VIEW): 2138 kind = "VIEW" 2139 elif self._match(TokenType.ROW): 2140 kind = "ROW" 2141 elif self._match_text_seq("DATABASE"): 2142 kind = "DATABASE" 2143 else: 2144 kind = None 2145 2146 if kind in ("DATABASE", "TABLE", "VIEW"): 2147 this = self._parse_table_parts() 2148 else: 2149 this = None 2150 2151 if self._match(TokenType.FOR): 2152 for_or_in = "FOR" 2153 elif self._match(TokenType.IN): 2154 for_or_in = "IN" 2155 else: 2156 for_or_in = None 2157 2158 if self._match_text_seq("ACCESS"): 2159 lock_type = "ACCESS" 2160 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2161 lock_type = "EXCLUSIVE" 2162 elif self._match_text_seq("SHARE"): 2163 lock_type = "SHARE" 2164 elif self._match_text_seq("READ"): 2165 lock_type = "READ" 2166 elif self._match_text_seq("WRITE"): 2167 lock_type = "WRITE" 2168 elif self._match_text_seq("CHECKSUM"): 2169 lock_type = "CHECKSUM" 2170 else: 2171 lock_type = None 2172 2173 override = self._match_text_seq("OVERRIDE") 2174 2175 return self.expression( 2176 exp.LockingProperty, 2177 this=this, 2178 kind=kind, 2179 for_or_in=for_or_in, 2180 lock_type=lock_type, 2181 override=override, 2182 ) 2183 2184 def _parse_partition_by(self) -> t.List[exp.Expression]: 2185 if self._match(TokenType.PARTITION_BY): 2186 return self._parse_csv(self._parse_conjunction) 2187 return [] 2188 2189 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2190 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2191 if self._match_text_seq("MINVALUE"): 2192 return exp.var("MINVALUE") 2193 if self._match_text_seq("MAXVALUE"): 2194 return exp.var("MAXVALUE") 2195 return self._parse_bitwise() 2196 2197 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2198 expression = None 2199 from_expressions = None 2200 to_expressions = None 2201 2202 if self._match(TokenType.IN): 2203 this = self._parse_wrapped_csv(self._parse_bitwise) 2204 elif self._match(TokenType.FROM): 2205 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2206 self._match_text_seq("TO") 2207 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2208 elif self._match_text_seq("WITH", "(", "MODULUS"): 2209 this = self._parse_number() 2210 self._match_text_seq(",", "REMAINDER") 2211 expression = self._parse_number() 2212 self._match_r_paren() 2213 else: 2214 self.raise_error("Failed to parse partition bound spec.") 2215 2216 return self.expression( 2217 exp.PartitionBoundSpec, 2218 this=this, 2219 expression=expression, 2220 from_expressions=from_expressions, 2221 to_expressions=to_expressions, 2222 ) 2223 2224 # https://www.postgresql.org/docs/current/sql-createtable.html 2225 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2226 if not self._match_text_seq("OF"): 2227 self._retreat(self._index - 1) 2228 return None 2229 2230 this = self._parse_table(schema=True) 2231 2232 if self._match(TokenType.DEFAULT): 2233 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2234 elif self._match_text_seq("FOR", "VALUES"): 2235 expression = self._parse_partition_bound_spec() 2236 else: 2237 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2238 2239 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2240 2241 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2242 self._match(TokenType.EQ) 2243 return self.expression( 2244 exp.PartitionedByProperty, 2245 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2246 ) 2247 2248 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2249 if self._match_text_seq("AND", "STATISTICS"): 2250 statistics = True 2251 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2252 statistics = False 2253 else: 2254 statistics = None 2255 2256 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2257 2258 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2259 if self._match_text_seq("SQL"): 2260 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2261 return None 2262 2263 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2264 if self._match_text_seq("SQL", "DATA"): 2265 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2266 return None 2267 2268 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2269 if self._match_text_seq("PRIMARY", "INDEX"): 2270 return exp.NoPrimaryIndexProperty() 2271 if self._match_text_seq("SQL"): 2272 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2273 return None 2274 2275 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2276 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2277 return exp.OnCommitProperty() 2278 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2279 return exp.OnCommitProperty(delete=True) 2280 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2281 2282 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2283 if self._match_text_seq("SQL", "DATA"): 2284 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2285 return None 2286 2287 def _parse_distkey(self) -> exp.DistKeyProperty: 2288 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2289 2290 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2291 table = self._parse_table(schema=True) 2292 2293 options = [] 2294 while self._match_texts(("INCLUDING", "EXCLUDING")): 2295 this = self._prev.text.upper() 2296 2297 id_var = self._parse_id_var() 2298 if not id_var: 2299 return None 2300 2301 options.append( 2302 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2303 ) 2304 2305 return self.expression(exp.LikeProperty, this=table, expressions=options) 2306 2307 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2308 return self.expression( 2309 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2310 ) 2311 2312 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2313 self._match(TokenType.EQ) 2314 return self.expression( 2315 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2316 ) 2317 2318 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2319 self._match_text_seq("WITH", "CONNECTION") 2320 return self.expression( 2321 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2322 ) 2323 2324 def _parse_returns(self) -> exp.ReturnsProperty: 2325 value: t.Optional[exp.Expression] 2326 null = None 2327 is_table = self._match(TokenType.TABLE) 2328 2329 if is_table: 2330 if self._match(TokenType.LT): 2331 value = self.expression( 2332 exp.Schema, 2333 this="TABLE", 2334 expressions=self._parse_csv(self._parse_struct_types), 2335 ) 2336 if not self._match(TokenType.GT): 2337 self.raise_error("Expecting >") 2338 else: 2339 value = self._parse_schema(exp.var("TABLE")) 2340 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2341 null = True 2342 value = None 2343 else: 2344 value = self._parse_types() 2345 2346 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2347 2348 def _parse_describe(self) -> exp.Describe: 2349 kind = self._match_set(self.CREATABLES) and self._prev.text 2350 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2351 if self._match(TokenType.DOT): 2352 style = None 2353 self._retreat(self._index - 2) 2354 this = self._parse_table(schema=True) 2355 properties = self._parse_properties() 2356 expressions = properties.expressions if properties else None 2357 return self.expression( 2358 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2359 ) 2360 2361 def _parse_insert(self) -> exp.Insert: 2362 comments = ensure_list(self._prev_comments) 2363 hint = self._parse_hint() 2364 overwrite = self._match(TokenType.OVERWRITE) 2365 ignore = self._match(TokenType.IGNORE) 2366 local = self._match_text_seq("LOCAL") 2367 alternative = None 2368 is_function = None 2369 2370 if self._match_text_seq("DIRECTORY"): 2371 this: t.Optional[exp.Expression] = self.expression( 2372 exp.Directory, 2373 this=self._parse_var_or_string(), 2374 local=local, 2375 row_format=self._parse_row_format(match_row=True), 2376 ) 2377 else: 2378 if self._match(TokenType.OR): 2379 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2380 2381 self._match(TokenType.INTO) 2382 comments += ensure_list(self._prev_comments) 2383 self._match(TokenType.TABLE) 2384 is_function = self._match(TokenType.FUNCTION) 2385 2386 this = ( 2387 self._parse_table(schema=True, parse_partition=True) 2388 if not is_function 2389 else self._parse_function() 2390 ) 2391 2392 returning = self._parse_returning() 2393 2394 return self.expression( 2395 exp.Insert, 2396 comments=comments, 2397 hint=hint, 2398 is_function=is_function, 2399 this=this, 2400 stored=self._match_text_seq("STORED") and self._parse_stored(), 2401 by_name=self._match_text_seq("BY", "NAME"), 2402 exists=self._parse_exists(), 2403 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2404 and self._parse_conjunction(), 2405 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2406 conflict=self._parse_on_conflict(), 2407 returning=returning or self._parse_returning(), 2408 overwrite=overwrite, 2409 alternative=alternative, 2410 ignore=ignore, 2411 ) 2412 2413 def _parse_kill(self) -> exp.Kill: 2414 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2415 2416 return self.expression( 2417 exp.Kill, 2418 this=self._parse_primary(), 2419 kind=kind, 2420 ) 2421 2422 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2423 conflict = self._match_text_seq("ON", "CONFLICT") 2424 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2425 2426 if not conflict and not duplicate: 2427 return None 2428 2429 conflict_keys = None 2430 constraint = None 2431 2432 if conflict: 2433 if self._match_text_seq("ON", "CONSTRAINT"): 2434 constraint = self._parse_id_var() 2435 elif self._match(TokenType.L_PAREN): 2436 conflict_keys = self._parse_csv(self._parse_id_var) 2437 self._match_r_paren() 2438 2439 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2440 if self._prev.token_type == TokenType.UPDATE: 2441 self._match(TokenType.SET) 2442 expressions = self._parse_csv(self._parse_equality) 2443 else: 2444 expressions = None 2445 2446 return self.expression( 2447 exp.OnConflict, 2448 duplicate=duplicate, 2449 expressions=expressions, 2450 action=action, 2451 conflict_keys=conflict_keys, 2452 constraint=constraint, 2453 ) 2454 2455 def _parse_returning(self) -> t.Optional[exp.Returning]: 2456 if not self._match(TokenType.RETURNING): 2457 return None 2458 return self.expression( 2459 exp.Returning, 2460 expressions=self._parse_csv(self._parse_expression), 2461 into=self._match(TokenType.INTO) and self._parse_table_part(), 2462 ) 2463 2464 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2465 if not self._match(TokenType.FORMAT): 2466 return None 2467 return self._parse_row_format() 2468 2469 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2470 index = self._index 2471 with_ = with_ or self._match_text_seq("WITH") 2472 2473 if not self._match(TokenType.SERDE_PROPERTIES): 2474 self._retreat(index) 2475 return None 2476 return self.expression( 2477 exp.SerdeProperties, 2478 **{ # type: ignore 2479 "expressions": self._parse_wrapped_properties(), 2480 "with": with_, 2481 }, 2482 ) 2483 2484 def _parse_row_format( 2485 self, match_row: bool = False 2486 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2487 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2488 return None 2489 2490 if self._match_text_seq("SERDE"): 2491 this = self._parse_string() 2492 2493 serde_properties = self._parse_serde_properties() 2494 2495 return self.expression( 2496 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2497 ) 2498 2499 self._match_text_seq("DELIMITED") 2500 2501 kwargs = {} 2502 2503 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2504 kwargs["fields"] = self._parse_string() 2505 if self._match_text_seq("ESCAPED", "BY"): 2506 kwargs["escaped"] = self._parse_string() 2507 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2508 kwargs["collection_items"] = self._parse_string() 2509 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2510 kwargs["map_keys"] = self._parse_string() 2511 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2512 kwargs["lines"] = self._parse_string() 2513 if self._match_text_seq("NULL", "DEFINED", "AS"): 2514 kwargs["null"] = self._parse_string() 2515 2516 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2517 2518 def _parse_load(self) -> exp.LoadData | exp.Command: 2519 if self._match_text_seq("DATA"): 2520 local = self._match_text_seq("LOCAL") 2521 self._match_text_seq("INPATH") 2522 inpath = self._parse_string() 2523 overwrite = self._match(TokenType.OVERWRITE) 2524 self._match_pair(TokenType.INTO, TokenType.TABLE) 2525 2526 return self.expression( 2527 exp.LoadData, 2528 this=self._parse_table(schema=True), 2529 local=local, 2530 overwrite=overwrite, 2531 inpath=inpath, 2532 partition=self._parse_partition(), 2533 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2534 serde=self._match_text_seq("SERDE") and self._parse_string(), 2535 ) 2536 return self._parse_as_command(self._prev) 2537 2538 def _parse_delete(self) -> exp.Delete: 2539 # This handles MySQL's "Multiple-Table Syntax" 2540 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2541 tables = None 2542 comments = self._prev_comments 2543 if not self._match(TokenType.FROM, advance=False): 2544 tables = self._parse_csv(self._parse_table) or None 2545 2546 returning = self._parse_returning() 2547 2548 return self.expression( 2549 exp.Delete, 2550 comments=comments, 2551 tables=tables, 2552 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2553 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2554 where=self._parse_where(), 2555 returning=returning or self._parse_returning(), 2556 limit=self._parse_limit(), 2557 ) 2558 2559 def _parse_update(self) -> exp.Update: 2560 comments = self._prev_comments 2561 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2562 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2563 returning = self._parse_returning() 2564 return self.expression( 2565 exp.Update, 2566 comments=comments, 2567 **{ # type: ignore 2568 "this": this, 2569 "expressions": expressions, 2570 "from": self._parse_from(joins=True), 2571 "where": self._parse_where(), 2572 "returning": returning or self._parse_returning(), 2573 "order": self._parse_order(), 2574 "limit": self._parse_limit(), 2575 }, 2576 ) 2577 2578 def _parse_uncache(self) -> exp.Uncache: 2579 if not self._match(TokenType.TABLE): 2580 self.raise_error("Expecting TABLE after UNCACHE") 2581 2582 return self.expression( 2583 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2584 ) 2585 2586 def _parse_cache(self) -> exp.Cache: 2587 lazy = self._match_text_seq("LAZY") 2588 self._match(TokenType.TABLE) 2589 table = self._parse_table(schema=True) 2590 2591 options = [] 2592 if self._match_text_seq("OPTIONS"): 2593 self._match_l_paren() 2594 k = self._parse_string() 2595 self._match(TokenType.EQ) 2596 v = self._parse_string() 2597 options = [k, v] 2598 self._match_r_paren() 2599 2600 self._match(TokenType.ALIAS) 2601 return self.expression( 2602 exp.Cache, 2603 this=table, 2604 lazy=lazy, 2605 options=options, 2606 expression=self._parse_select(nested=True), 2607 ) 2608 2609 def _parse_partition(self) -> t.Optional[exp.Partition]: 2610 if not self._match(TokenType.PARTITION): 2611 return None 2612 2613 return self.expression( 2614 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2615 ) 2616 2617 def _parse_value(self) -> t.Optional[exp.Tuple]: 2618 if self._match(TokenType.L_PAREN): 2619 expressions = self._parse_csv(self._parse_expression) 2620 self._match_r_paren() 2621 return self.expression(exp.Tuple, expressions=expressions) 2622 2623 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2624 expression = self._parse_expression() 2625 if expression: 2626 return self.expression(exp.Tuple, expressions=[expression]) 2627 return None 2628 2629 def _parse_projections(self) -> t.List[exp.Expression]: 2630 return self._parse_expressions() 2631 2632 def _parse_select( 2633 self, 2634 nested: bool = False, 2635 table: bool = False, 2636 parse_subquery_alias: bool = True, 2637 parse_set_operation: bool = True, 2638 ) -> t.Optional[exp.Expression]: 2639 cte = self._parse_with() 2640 2641 if cte: 2642 this = self._parse_statement() 2643 2644 if not this: 2645 self.raise_error("Failed to parse any statement following CTE") 2646 return cte 2647 2648 if "with" in this.arg_types: 2649 this.set("with", cte) 2650 else: 2651 self.raise_error(f"{this.key} does not support CTE") 2652 this = cte 2653 2654 return this 2655 2656 # duckdb supports leading with FROM x 2657 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2658 2659 if self._match(TokenType.SELECT): 2660 comments = self._prev_comments 2661 2662 hint = self._parse_hint() 2663 all_ = self._match(TokenType.ALL) 2664 distinct = self._match_set(self.DISTINCT_TOKENS) 2665 2666 kind = ( 2667 self._match(TokenType.ALIAS) 2668 and self._match_texts(("STRUCT", "VALUE")) 2669 and self._prev.text.upper() 2670 ) 2671 2672 if distinct: 2673 distinct = self.expression( 2674 exp.Distinct, 2675 on=self._parse_value() if self._match(TokenType.ON) else None, 2676 ) 2677 2678 if all_ and distinct: 2679 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2680 2681 limit = self._parse_limit(top=True) 2682 projections = self._parse_projections() 2683 2684 this = self.expression( 2685 exp.Select, 2686 kind=kind, 2687 hint=hint, 2688 distinct=distinct, 2689 expressions=projections, 2690 limit=limit, 2691 ) 2692 this.comments = comments 2693 2694 into = self._parse_into() 2695 if into: 2696 this.set("into", into) 2697 2698 if not from_: 2699 from_ = self._parse_from() 2700 2701 if from_: 2702 this.set("from", from_) 2703 2704 this = self._parse_query_modifiers(this) 2705 elif (table or nested) and self._match(TokenType.L_PAREN): 2706 if self._match(TokenType.PIVOT): 2707 this = self._parse_simplified_pivot() 2708 elif self._match(TokenType.FROM): 2709 this = exp.select("*").from_( 2710 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2711 ) 2712 else: 2713 this = ( 2714 self._parse_table() 2715 if table 2716 else self._parse_select(nested=True, parse_set_operation=False) 2717 ) 2718 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2719 2720 self._match_r_paren() 2721 2722 # We return early here so that the UNION isn't attached to the subquery by the 2723 # following call to _parse_set_operations, but instead becomes the parent node 2724 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2725 elif self._match(TokenType.VALUES, advance=False): 2726 this = self._parse_derived_table_values() 2727 elif from_: 2728 this = exp.select("*").from_(from_.this, copy=False) 2729 else: 2730 this = None 2731 2732 if parse_set_operation: 2733 return self._parse_set_operations(this) 2734 return this 2735 2736 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2737 if not skip_with_token and not self._match(TokenType.WITH): 2738 return None 2739 2740 comments = self._prev_comments 2741 recursive = self._match(TokenType.RECURSIVE) 2742 2743 expressions = [] 2744 while True: 2745 expressions.append(self._parse_cte()) 2746 2747 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2748 break 2749 else: 2750 self._match(TokenType.WITH) 2751 2752 return self.expression( 2753 exp.With, comments=comments, expressions=expressions, recursive=recursive 2754 ) 2755 2756 def _parse_cte(self) -> exp.CTE: 2757 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2758 if not alias or not alias.this: 2759 self.raise_error("Expected CTE to have alias") 2760 2761 self._match(TokenType.ALIAS) 2762 2763 if self._match_text_seq("NOT", "MATERIALIZED"): 2764 materialized = False 2765 elif self._match_text_seq("MATERIALIZED"): 2766 materialized = True 2767 else: 2768 materialized = None 2769 2770 return self.expression( 2771 exp.CTE, 2772 this=self._parse_wrapped(self._parse_statement), 2773 alias=alias, 2774 materialized=materialized, 2775 ) 2776 2777 def _parse_table_alias( 2778 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2779 ) -> t.Optional[exp.TableAlias]: 2780 any_token = self._match(TokenType.ALIAS) 2781 alias = ( 2782 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2783 or self._parse_string_as_identifier() 2784 ) 2785 2786 index = self._index 2787 if self._match(TokenType.L_PAREN): 2788 columns = self._parse_csv(self._parse_function_parameter) 2789 self._match_r_paren() if columns else self._retreat(index) 2790 else: 2791 columns = None 2792 2793 if not alias and not columns: 2794 return None 2795 2796 return self.expression(exp.TableAlias, this=alias, columns=columns) 2797 2798 def _parse_subquery( 2799 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2800 ) -> t.Optional[exp.Subquery]: 2801 if not this: 2802 return None 2803 2804 return self.expression( 2805 exp.Subquery, 2806 this=this, 2807 pivots=self._parse_pivots(), 2808 alias=self._parse_table_alias() if parse_alias else None, 2809 ) 2810 2811 def _implicit_unnests_to_explicit(self, this: E) -> E: 2812 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2813 2814 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2815 for i, join in enumerate(this.args.get("joins") or []): 2816 table = join.this 2817 normalized_table = table.copy() 2818 normalized_table.meta["maybe_column"] = True 2819 normalized_table = _norm(normalized_table, dialect=self.dialect) 2820 2821 if isinstance(table, exp.Table) and not join.args.get("on"): 2822 if normalized_table.parts[0].name in refs: 2823 table_as_column = table.to_column() 2824 unnest = exp.Unnest(expressions=[table_as_column]) 2825 2826 # Table.to_column creates a parent Alias node that we want to convert to 2827 # a TableAlias and attach to the Unnest, so it matches the parser's output 2828 if isinstance(table.args.get("alias"), exp.TableAlias): 2829 table_as_column.replace(table_as_column.this) 2830 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2831 2832 table.replace(unnest) 2833 2834 refs.add(normalized_table.alias_or_name) 2835 2836 return this 2837 2838 def _parse_query_modifiers( 2839 self, this: t.Optional[exp.Expression] 2840 ) -> t.Optional[exp.Expression]: 2841 if isinstance(this, (exp.Query, exp.Table)): 2842 for join in self._parse_joins(): 2843 this.append("joins", join) 2844 for lateral in iter(self._parse_lateral, None): 2845 this.append("laterals", lateral) 2846 2847 while True: 2848 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2849 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2850 key, expression = parser(self) 2851 2852 if expression: 2853 this.set(key, expression) 2854 if key == "limit": 2855 offset = expression.args.pop("offset", None) 2856 2857 if offset: 2858 offset = exp.Offset(expression=offset) 2859 this.set("offset", offset) 2860 2861 limit_by_expressions = expression.expressions 2862 expression.set("expressions", None) 2863 offset.set("expressions", limit_by_expressions) 2864 continue 2865 break 2866 2867 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2868 this = self._implicit_unnests_to_explicit(this) 2869 2870 return this 2871 2872 def _parse_hint(self) -> t.Optional[exp.Hint]: 2873 if self._match(TokenType.HINT): 2874 hints = [] 2875 for hint in iter( 2876 lambda: self._parse_csv( 2877 lambda: self._parse_function() or self._parse_var(upper=True) 2878 ), 2879 [], 2880 ): 2881 hints.extend(hint) 2882 2883 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2884 self.raise_error("Expected */ after HINT") 2885 2886 return self.expression(exp.Hint, expressions=hints) 2887 2888 return None 2889 2890 def _parse_into(self) -> t.Optional[exp.Into]: 2891 if not self._match(TokenType.INTO): 2892 return None 2893 2894 temp = self._match(TokenType.TEMPORARY) 2895 unlogged = self._match_text_seq("UNLOGGED") 2896 self._match(TokenType.TABLE) 2897 2898 return self.expression( 2899 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2900 ) 2901 2902 def _parse_from( 2903 self, joins: bool = False, skip_from_token: bool = False 2904 ) -> t.Optional[exp.From]: 2905 if not skip_from_token and not self._match(TokenType.FROM): 2906 return None 2907 2908 return self.expression( 2909 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2910 ) 2911 2912 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2913 return self.expression( 2914 exp.MatchRecognizeMeasure, 2915 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2916 this=self._parse_expression(), 2917 ) 2918 2919 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2920 if not self._match(TokenType.MATCH_RECOGNIZE): 2921 return None 2922 2923 self._match_l_paren() 2924 2925 partition = self._parse_partition_by() 2926 order = self._parse_order() 2927 2928 measures = ( 2929 self._parse_csv(self._parse_match_recognize_measure) 2930 if self._match_text_seq("MEASURES") 2931 else None 2932 ) 2933 2934 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2935 rows = exp.var("ONE ROW PER MATCH") 2936 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2937 text = "ALL ROWS PER MATCH" 2938 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2939 text += " SHOW EMPTY MATCHES" 2940 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2941 text += " OMIT EMPTY MATCHES" 2942 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2943 text += " WITH UNMATCHED ROWS" 2944 rows = exp.var(text) 2945 else: 2946 rows = None 2947 2948 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2949 text = "AFTER MATCH SKIP" 2950 if self._match_text_seq("PAST", "LAST", "ROW"): 2951 text += " PAST LAST ROW" 2952 elif self._match_text_seq("TO", "NEXT", "ROW"): 2953 text += " TO NEXT ROW" 2954 elif self._match_text_seq("TO", "FIRST"): 2955 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2956 elif self._match_text_seq("TO", "LAST"): 2957 text += f" TO LAST {self._advance_any().text}" # type: ignore 2958 after = exp.var(text) 2959 else: 2960 after = None 2961 2962 if self._match_text_seq("PATTERN"): 2963 self._match_l_paren() 2964 2965 if not self._curr: 2966 self.raise_error("Expecting )", self._curr) 2967 2968 paren = 1 2969 start = self._curr 2970 2971 while self._curr and paren > 0: 2972 if self._curr.token_type == TokenType.L_PAREN: 2973 paren += 1 2974 if self._curr.token_type == TokenType.R_PAREN: 2975 paren -= 1 2976 2977 end = self._prev 2978 self._advance() 2979 2980 if paren > 0: 2981 self.raise_error("Expecting )", self._curr) 2982 2983 pattern = exp.var(self._find_sql(start, end)) 2984 else: 2985 pattern = None 2986 2987 define = ( 2988 self._parse_csv(self._parse_name_as_expression) 2989 if self._match_text_seq("DEFINE") 2990 else None 2991 ) 2992 2993 self._match_r_paren() 2994 2995 return self.expression( 2996 exp.MatchRecognize, 2997 partition_by=partition, 2998 order=order, 2999 measures=measures, 3000 rows=rows, 3001 after=after, 3002 pattern=pattern, 3003 define=define, 3004 alias=self._parse_table_alias(), 3005 ) 3006 3007 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3008 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3009 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3010 cross_apply = False 3011 3012 if cross_apply is not None: 3013 this = self._parse_select(table=True) 3014 view = None 3015 outer = None 3016 elif self._match(TokenType.LATERAL): 3017 this = self._parse_select(table=True) 3018 view = self._match(TokenType.VIEW) 3019 outer = self._match(TokenType.OUTER) 3020 else: 3021 return None 3022 3023 if not this: 3024 this = ( 3025 self._parse_unnest() 3026 or self._parse_function() 3027 or self._parse_id_var(any_token=False) 3028 ) 3029 3030 while self._match(TokenType.DOT): 3031 this = exp.Dot( 3032 this=this, 3033 expression=self._parse_function() or self._parse_id_var(any_token=False), 3034 ) 3035 3036 if view: 3037 table = self._parse_id_var(any_token=False) 3038 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3039 table_alias: t.Optional[exp.TableAlias] = self.expression( 3040 exp.TableAlias, this=table, columns=columns 3041 ) 3042 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3043 # We move the alias from the lateral's child node to the lateral itself 3044 table_alias = this.args["alias"].pop() 3045 else: 3046 table_alias = self._parse_table_alias() 3047 3048 return self.expression( 3049 exp.Lateral, 3050 this=this, 3051 view=view, 3052 outer=outer, 3053 alias=table_alias, 3054 cross_apply=cross_apply, 3055 ) 3056 3057 def _parse_join_parts( 3058 self, 3059 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3060 return ( 3061 self._match_set(self.JOIN_METHODS) and self._prev, 3062 self._match_set(self.JOIN_SIDES) and self._prev, 3063 self._match_set(self.JOIN_KINDS) and self._prev, 3064 ) 3065 3066 def _parse_join( 3067 self, skip_join_token: bool = False, parse_bracket: bool = False 3068 ) -> t.Optional[exp.Join]: 3069 if self._match(TokenType.COMMA): 3070 return self.expression(exp.Join, this=self._parse_table()) 3071 3072 index = self._index 3073 method, side, kind = self._parse_join_parts() 3074 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3075 join = self._match(TokenType.JOIN) 3076 3077 if not skip_join_token and not join: 3078 self._retreat(index) 3079 kind = None 3080 method = None 3081 side = None 3082 3083 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3084 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3085 3086 if not skip_join_token and not join and not outer_apply and not cross_apply: 3087 return None 3088 3089 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3090 3091 if method: 3092 kwargs["method"] = method.text 3093 if side: 3094 kwargs["side"] = side.text 3095 if kind: 3096 kwargs["kind"] = kind.text 3097 if hint: 3098 kwargs["hint"] = hint 3099 3100 if self._match(TokenType.MATCH_CONDITION): 3101 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3102 3103 if self._match(TokenType.ON): 3104 kwargs["on"] = self._parse_conjunction() 3105 elif self._match(TokenType.USING): 3106 kwargs["using"] = self._parse_wrapped_id_vars() 3107 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3108 kind and kind.token_type == TokenType.CROSS 3109 ): 3110 index = self._index 3111 joins: t.Optional[list] = list(self._parse_joins()) 3112 3113 if joins and self._match(TokenType.ON): 3114 kwargs["on"] = self._parse_conjunction() 3115 elif joins and self._match(TokenType.USING): 3116 kwargs["using"] = self._parse_wrapped_id_vars() 3117 else: 3118 joins = None 3119 self._retreat(index) 3120 3121 kwargs["this"].set("joins", joins if joins else None) 3122 3123 comments = [c for token in (method, side, kind) if token for c in token.comments] 3124 return self.expression(exp.Join, comments=comments, **kwargs) 3125 3126 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3127 this = self._parse_conjunction() 3128 3129 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3130 return this 3131 3132 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3133 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3134 3135 return this 3136 3137 def _parse_index_params(self) -> exp.IndexParameters: 3138 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3139 3140 if self._match(TokenType.L_PAREN, advance=False): 3141 columns = self._parse_wrapped_csv(self._parse_with_operator) 3142 else: 3143 columns = None 3144 3145 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3146 partition_by = self._parse_partition_by() 3147 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3148 tablespace = ( 3149 self._parse_var(any_token=True) 3150 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3151 else None 3152 ) 3153 where = self._parse_where() 3154 3155 return self.expression( 3156 exp.IndexParameters, 3157 using=using, 3158 columns=columns, 3159 include=include, 3160 partition_by=partition_by, 3161 where=where, 3162 with_storage=with_storage, 3163 tablespace=tablespace, 3164 ) 3165 3166 def _parse_index( 3167 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3168 ) -> t.Optional[exp.Index]: 3169 if index or anonymous: 3170 unique = None 3171 primary = None 3172 amp = None 3173 3174 self._match(TokenType.ON) 3175 self._match(TokenType.TABLE) # hive 3176 table = self._parse_table_parts(schema=True) 3177 else: 3178 unique = self._match(TokenType.UNIQUE) 3179 primary = self._match_text_seq("PRIMARY") 3180 amp = self._match_text_seq("AMP") 3181 3182 if not self._match(TokenType.INDEX): 3183 return None 3184 3185 index = self._parse_id_var() 3186 table = None 3187 3188 params = self._parse_index_params() 3189 3190 return self.expression( 3191 exp.Index, 3192 this=index, 3193 table=table, 3194 unique=unique, 3195 primary=primary, 3196 amp=amp, 3197 params=params, 3198 ) 3199 3200 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3201 hints: t.List[exp.Expression] = [] 3202 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3203 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3204 hints.append( 3205 self.expression( 3206 exp.WithTableHint, 3207 expressions=self._parse_csv( 3208 lambda: self._parse_function() or self._parse_var(any_token=True) 3209 ), 3210 ) 3211 ) 3212 self._match_r_paren() 3213 else: 3214 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3215 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3216 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3217 3218 self._match_texts(("INDEX", "KEY")) 3219 if self._match(TokenType.FOR): 3220 hint.set("target", self._advance_any() and self._prev.text.upper()) 3221 3222 hint.set("expressions", self._parse_wrapped_id_vars()) 3223 hints.append(hint) 3224 3225 return hints or None 3226 3227 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3228 return ( 3229 (not schema and self._parse_function(optional_parens=False)) 3230 or self._parse_id_var(any_token=False) 3231 or self._parse_string_as_identifier() 3232 or self._parse_placeholder() 3233 ) 3234 3235 def _parse_table_parts( 3236 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3237 ) -> exp.Table: 3238 catalog = None 3239 db = None 3240 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3241 3242 while self._match(TokenType.DOT): 3243 if catalog: 3244 # This allows nesting the table in arbitrarily many dot expressions if needed 3245 table = self.expression( 3246 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3247 ) 3248 else: 3249 catalog = db 3250 db = table 3251 # "" used for tsql FROM a..b case 3252 table = self._parse_table_part(schema=schema) or "" 3253 3254 if ( 3255 wildcard 3256 and self._is_connected() 3257 and (isinstance(table, exp.Identifier) or not table) 3258 and self._match(TokenType.STAR) 3259 ): 3260 if isinstance(table, exp.Identifier): 3261 table.args["this"] += "*" 3262 else: 3263 table = exp.Identifier(this="*") 3264 3265 # We bubble up comments from the Identifier to the Table 3266 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3267 3268 if is_db_reference: 3269 catalog = db 3270 db = table 3271 table = None 3272 3273 if not table and not is_db_reference: 3274 self.raise_error(f"Expected table name but got {self._curr}") 3275 if not db and is_db_reference: 3276 self.raise_error(f"Expected database name but got {self._curr}") 3277 3278 return self.expression( 3279 exp.Table, 3280 comments=comments, 3281 this=table, 3282 db=db, 3283 catalog=catalog, 3284 pivots=self._parse_pivots(), 3285 ) 3286 3287 def _parse_table( 3288 self, 3289 schema: bool = False, 3290 joins: bool = False, 3291 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3292 parse_bracket: bool = False, 3293 is_db_reference: bool = False, 3294 parse_partition: bool = False, 3295 ) -> t.Optional[exp.Expression]: 3296 lateral = self._parse_lateral() 3297 if lateral: 3298 return lateral 3299 3300 unnest = self._parse_unnest() 3301 if unnest: 3302 return unnest 3303 3304 values = self._parse_derived_table_values() 3305 if values: 3306 return values 3307 3308 subquery = self._parse_select(table=True) 3309 if subquery: 3310 if not subquery.args.get("pivots"): 3311 subquery.set("pivots", self._parse_pivots()) 3312 return subquery 3313 3314 bracket = parse_bracket and self._parse_bracket(None) 3315 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3316 3317 only = self._match(TokenType.ONLY) 3318 3319 this = t.cast( 3320 exp.Expression, 3321 bracket 3322 or self._parse_bracket( 3323 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3324 ), 3325 ) 3326 3327 if only: 3328 this.set("only", only) 3329 3330 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3331 self._match_text_seq("*") 3332 3333 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3334 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3335 this.set("partition", self._parse_partition()) 3336 3337 if schema: 3338 return self._parse_schema(this=this) 3339 3340 version = self._parse_version() 3341 3342 if version: 3343 this.set("version", version) 3344 3345 if self.dialect.ALIAS_POST_TABLESAMPLE: 3346 table_sample = self._parse_table_sample() 3347 3348 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3349 if alias: 3350 this.set("alias", alias) 3351 3352 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3353 return self.expression( 3354 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3355 ) 3356 3357 this.set("hints", self._parse_table_hints()) 3358 3359 if not this.args.get("pivots"): 3360 this.set("pivots", self._parse_pivots()) 3361 3362 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3363 table_sample = self._parse_table_sample() 3364 3365 if table_sample: 3366 table_sample.set("this", this) 3367 this = table_sample 3368 3369 if joins: 3370 for join in self._parse_joins(): 3371 this.append("joins", join) 3372 3373 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3374 this.set("ordinality", True) 3375 this.set("alias", self._parse_table_alias()) 3376 3377 return this 3378 3379 def _parse_version(self) -> t.Optional[exp.Version]: 3380 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3381 this = "TIMESTAMP" 3382 elif self._match(TokenType.VERSION_SNAPSHOT): 3383 this = "VERSION" 3384 else: 3385 return None 3386 3387 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3388 kind = self._prev.text.upper() 3389 start = self._parse_bitwise() 3390 self._match_texts(("TO", "AND")) 3391 end = self._parse_bitwise() 3392 expression: t.Optional[exp.Expression] = self.expression( 3393 exp.Tuple, expressions=[start, end] 3394 ) 3395 elif self._match_text_seq("CONTAINED", "IN"): 3396 kind = "CONTAINED IN" 3397 expression = self.expression( 3398 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3399 ) 3400 elif self._match(TokenType.ALL): 3401 kind = "ALL" 3402 expression = None 3403 else: 3404 self._match_text_seq("AS", "OF") 3405 kind = "AS OF" 3406 expression = self._parse_type() 3407 3408 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3409 3410 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3411 if not self._match(TokenType.UNNEST): 3412 return None 3413 3414 expressions = self._parse_wrapped_csv(self._parse_equality) 3415 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3416 3417 alias = self._parse_table_alias() if with_alias else None 3418 3419 if alias: 3420 if self.dialect.UNNEST_COLUMN_ONLY: 3421 if alias.args.get("columns"): 3422 self.raise_error("Unexpected extra column alias in unnest.") 3423 3424 alias.set("columns", [alias.this]) 3425 alias.set("this", None) 3426 3427 columns = alias.args.get("columns") or [] 3428 if offset and len(expressions) < len(columns): 3429 offset = columns.pop() 3430 3431 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3432 self._match(TokenType.ALIAS) 3433 offset = self._parse_id_var( 3434 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3435 ) or exp.to_identifier("offset") 3436 3437 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3438 3439 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3440 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3441 if not is_derived and not self._match_text_seq("VALUES"): 3442 return None 3443 3444 expressions = self._parse_csv(self._parse_value) 3445 alias = self._parse_table_alias() 3446 3447 if is_derived: 3448 self._match_r_paren() 3449 3450 return self.expression( 3451 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3452 ) 3453 3454 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3455 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3456 as_modifier and self._match_text_seq("USING", "SAMPLE") 3457 ): 3458 return None 3459 3460 bucket_numerator = None 3461 bucket_denominator = None 3462 bucket_field = None 3463 percent = None 3464 size = None 3465 seed = None 3466 3467 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3468 matched_l_paren = self._match(TokenType.L_PAREN) 3469 3470 if self.TABLESAMPLE_CSV: 3471 num = None 3472 expressions = self._parse_csv(self._parse_primary) 3473 else: 3474 expressions = None 3475 num = ( 3476 self._parse_factor() 3477 if self._match(TokenType.NUMBER, advance=False) 3478 else self._parse_primary() or self._parse_placeholder() 3479 ) 3480 3481 if self._match_text_seq("BUCKET"): 3482 bucket_numerator = self._parse_number() 3483 self._match_text_seq("OUT", "OF") 3484 bucket_denominator = bucket_denominator = self._parse_number() 3485 self._match(TokenType.ON) 3486 bucket_field = self._parse_field() 3487 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3488 percent = num 3489 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3490 size = num 3491 else: 3492 percent = num 3493 3494 if matched_l_paren: 3495 self._match_r_paren() 3496 3497 if self._match(TokenType.L_PAREN): 3498 method = self._parse_var(upper=True) 3499 seed = self._match(TokenType.COMMA) and self._parse_number() 3500 self._match_r_paren() 3501 elif self._match_texts(("SEED", "REPEATABLE")): 3502 seed = self._parse_wrapped(self._parse_number) 3503 3504 if not method and self.DEFAULT_SAMPLING_METHOD: 3505 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3506 3507 return self.expression( 3508 exp.TableSample, 3509 expressions=expressions, 3510 method=method, 3511 bucket_numerator=bucket_numerator, 3512 bucket_denominator=bucket_denominator, 3513 bucket_field=bucket_field, 3514 percent=percent, 3515 size=size, 3516 seed=seed, 3517 ) 3518 3519 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3520 return list(iter(self._parse_pivot, None)) or None 3521 3522 def _parse_joins(self) -> t.Iterator[exp.Join]: 3523 return iter(self._parse_join, None) 3524 3525 # https://duckdb.org/docs/sql/statements/pivot 3526 def _parse_simplified_pivot(self) -> exp.Pivot: 3527 def _parse_on() -> t.Optional[exp.Expression]: 3528 this = self._parse_bitwise() 3529 return self._parse_in(this) if self._match(TokenType.IN) else this 3530 3531 this = self._parse_table() 3532 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3533 using = self._match(TokenType.USING) and self._parse_csv( 3534 lambda: self._parse_alias(self._parse_function()) 3535 ) 3536 group = self._parse_group() 3537 return self.expression( 3538 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3539 ) 3540 3541 def _parse_pivot_in(self) -> exp.In: 3542 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3543 this = self._parse_conjunction() 3544 3545 self._match(TokenType.ALIAS) 3546 alias = self._parse_field() 3547 if alias: 3548 return self.expression(exp.PivotAlias, this=this, alias=alias) 3549 3550 return this 3551 3552 value = self._parse_column() 3553 3554 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3555 self.raise_error("Expecting IN (") 3556 3557 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3558 3559 self._match_r_paren() 3560 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3561 3562 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3563 index = self._index 3564 include_nulls = None 3565 3566 if self._match(TokenType.PIVOT): 3567 unpivot = False 3568 elif self._match(TokenType.UNPIVOT): 3569 unpivot = True 3570 3571 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3572 if self._match_text_seq("INCLUDE", "NULLS"): 3573 include_nulls = True 3574 elif self._match_text_seq("EXCLUDE", "NULLS"): 3575 include_nulls = False 3576 else: 3577 return None 3578 3579 expressions = [] 3580 3581 if not self._match(TokenType.L_PAREN): 3582 self._retreat(index) 3583 return None 3584 3585 if unpivot: 3586 expressions = self._parse_csv(self._parse_column) 3587 else: 3588 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3589 3590 if not expressions: 3591 self.raise_error("Failed to parse PIVOT's aggregation list") 3592 3593 if not self._match(TokenType.FOR): 3594 self.raise_error("Expecting FOR") 3595 3596 field = self._parse_pivot_in() 3597 3598 self._match_r_paren() 3599 3600 pivot = self.expression( 3601 exp.Pivot, 3602 expressions=expressions, 3603 field=field, 3604 unpivot=unpivot, 3605 include_nulls=include_nulls, 3606 ) 3607 3608 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3609 pivot.set("alias", self._parse_table_alias()) 3610 3611 if not unpivot: 3612 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3613 3614 columns: t.List[exp.Expression] = [] 3615 for fld in pivot.args["field"].expressions: 3616 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3617 for name in names: 3618 if self.PREFIXED_PIVOT_COLUMNS: 3619 name = f"{name}_{field_name}" if name else field_name 3620 else: 3621 name = f"{field_name}_{name}" if name else field_name 3622 3623 columns.append(exp.to_identifier(name)) 3624 3625 pivot.set("columns", columns) 3626 3627 return pivot 3628 3629 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3630 return [agg.alias for agg in aggregations] 3631 3632 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3633 if not skip_where_token and not self._match(TokenType.PREWHERE): 3634 return None 3635 3636 return self.expression( 3637 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3638 ) 3639 3640 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3641 if not skip_where_token and not self._match(TokenType.WHERE): 3642 return None 3643 3644 return self.expression( 3645 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3646 ) 3647 3648 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3649 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3650 return None 3651 3652 elements: t.Dict[str, t.Any] = defaultdict(list) 3653 3654 if self._match(TokenType.ALL): 3655 elements["all"] = True 3656 elif self._match(TokenType.DISTINCT): 3657 elements["all"] = False 3658 3659 while True: 3660 expressions = self._parse_csv( 3661 lambda: None 3662 if self._match(TokenType.ROLLUP, advance=False) 3663 else self._parse_conjunction() 3664 ) 3665 if expressions: 3666 elements["expressions"].extend(expressions) 3667 3668 grouping_sets = self._parse_grouping_sets() 3669 if grouping_sets: 3670 elements["grouping_sets"].extend(grouping_sets) 3671 3672 rollup = None 3673 cube = None 3674 totals = None 3675 3676 index = self._index 3677 with_ = self._match(TokenType.WITH) 3678 if self._match(TokenType.ROLLUP): 3679 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3680 elements["rollup"].extend(ensure_list(rollup)) 3681 3682 if self._match(TokenType.CUBE): 3683 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3684 elements["cube"].extend(ensure_list(cube)) 3685 3686 if self._match_text_seq("TOTALS"): 3687 totals = True 3688 elements["totals"] = True # type: ignore 3689 3690 if not (grouping_sets or rollup or cube or totals): 3691 if with_: 3692 self._retreat(index) 3693 break 3694 3695 return self.expression(exp.Group, **elements) # type: ignore 3696 3697 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3698 if not self._match(TokenType.GROUPING_SETS): 3699 return None 3700 3701 return self._parse_wrapped_csv(self._parse_grouping_set) 3702 3703 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3704 if self._match(TokenType.L_PAREN): 3705 grouping_set = self._parse_csv(self._parse_column) 3706 self._match_r_paren() 3707 return self.expression(exp.Tuple, expressions=grouping_set) 3708 3709 return self._parse_column() 3710 3711 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3712 if not skip_having_token and not self._match(TokenType.HAVING): 3713 return None 3714 return self.expression(exp.Having, this=self._parse_conjunction()) 3715 3716 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3717 if not self._match(TokenType.QUALIFY): 3718 return None 3719 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3720 3721 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3722 if skip_start_token: 3723 start = None 3724 elif self._match(TokenType.START_WITH): 3725 start = self._parse_conjunction() 3726 else: 3727 return None 3728 3729 self._match(TokenType.CONNECT_BY) 3730 nocycle = self._match_text_seq("NOCYCLE") 3731 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3732 exp.Prior, this=self._parse_bitwise() 3733 ) 3734 connect = self._parse_conjunction() 3735 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3736 3737 if not start and self._match(TokenType.START_WITH): 3738 start = self._parse_conjunction() 3739 3740 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3741 3742 def _parse_name_as_expression(self) -> exp.Alias: 3743 return self.expression( 3744 exp.Alias, 3745 alias=self._parse_id_var(any_token=True), 3746 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3747 ) 3748 3749 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3750 if self._match_text_seq("INTERPOLATE"): 3751 return self._parse_wrapped_csv(self._parse_name_as_expression) 3752 return None 3753 3754 def _parse_order( 3755 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3756 ) -> t.Optional[exp.Expression]: 3757 siblings = None 3758 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3759 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3760 return this 3761 3762 siblings = True 3763 3764 return self.expression( 3765 exp.Order, 3766 this=this, 3767 expressions=self._parse_csv(self._parse_ordered), 3768 interpolate=self._parse_interpolate(), 3769 siblings=siblings, 3770 ) 3771 3772 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3773 if not self._match(token): 3774 return None 3775 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3776 3777 def _parse_ordered( 3778 self, parse_method: t.Optional[t.Callable] = None 3779 ) -> t.Optional[exp.Ordered]: 3780 this = parse_method() if parse_method else self._parse_conjunction() 3781 if not this: 3782 return None 3783 3784 asc = self._match(TokenType.ASC) 3785 desc = self._match(TokenType.DESC) or (asc and False) 3786 3787 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3788 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3789 3790 nulls_first = is_nulls_first or False 3791 explicitly_null_ordered = is_nulls_first or is_nulls_last 3792 3793 if ( 3794 not explicitly_null_ordered 3795 and ( 3796 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3797 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3798 ) 3799 and self.dialect.NULL_ORDERING != "nulls_are_last" 3800 ): 3801 nulls_first = True 3802 3803 if self._match_text_seq("WITH", "FILL"): 3804 with_fill = self.expression( 3805 exp.WithFill, 3806 **{ # type: ignore 3807 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3808 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3809 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3810 }, 3811 ) 3812 else: 3813 with_fill = None 3814 3815 return self.expression( 3816 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3817 ) 3818 3819 def _parse_limit( 3820 self, 3821 this: t.Optional[exp.Expression] = None, 3822 top: bool = False, 3823 skip_limit_token: bool = False, 3824 ) -> t.Optional[exp.Expression]: 3825 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3826 comments = self._prev_comments 3827 if top: 3828 limit_paren = self._match(TokenType.L_PAREN) 3829 expression = self._parse_term() if limit_paren else self._parse_number() 3830 3831 if limit_paren: 3832 self._match_r_paren() 3833 else: 3834 expression = self._parse_term() 3835 3836 if self._match(TokenType.COMMA): 3837 offset = expression 3838 expression = self._parse_term() 3839 else: 3840 offset = None 3841 3842 limit_exp = self.expression( 3843 exp.Limit, 3844 this=this, 3845 expression=expression, 3846 offset=offset, 3847 comments=comments, 3848 expressions=self._parse_limit_by(), 3849 ) 3850 3851 return limit_exp 3852 3853 if self._match(TokenType.FETCH): 3854 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3855 direction = self._prev.text.upper() if direction else "FIRST" 3856 3857 count = self._parse_field(tokens=self.FETCH_TOKENS) 3858 percent = self._match(TokenType.PERCENT) 3859 3860 self._match_set((TokenType.ROW, TokenType.ROWS)) 3861 3862 only = self._match_text_seq("ONLY") 3863 with_ties = self._match_text_seq("WITH", "TIES") 3864 3865 if only and with_ties: 3866 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3867 3868 return self.expression( 3869 exp.Fetch, 3870 direction=direction, 3871 count=count, 3872 percent=percent, 3873 with_ties=with_ties, 3874 ) 3875 3876 return this 3877 3878 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3879 if not self._match(TokenType.OFFSET): 3880 return this 3881 3882 count = self._parse_term() 3883 self._match_set((TokenType.ROW, TokenType.ROWS)) 3884 3885 return self.expression( 3886 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3887 ) 3888 3889 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3890 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3891 3892 def _parse_locks(self) -> t.List[exp.Lock]: 3893 locks = [] 3894 while True: 3895 if self._match_text_seq("FOR", "UPDATE"): 3896 update = True 3897 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3898 "LOCK", "IN", "SHARE", "MODE" 3899 ): 3900 update = False 3901 else: 3902 break 3903 3904 expressions = None 3905 if self._match_text_seq("OF"): 3906 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3907 3908 wait: t.Optional[bool | exp.Expression] = None 3909 if self._match_text_seq("NOWAIT"): 3910 wait = True 3911 elif self._match_text_seq("WAIT"): 3912 wait = self._parse_primary() 3913 elif self._match_text_seq("SKIP", "LOCKED"): 3914 wait = False 3915 3916 locks.append( 3917 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3918 ) 3919 3920 return locks 3921 3922 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3923 while this and self._match_set(self.SET_OPERATIONS): 3924 token_type = self._prev.token_type 3925 3926 if token_type == TokenType.UNION: 3927 operation = exp.Union 3928 elif token_type == TokenType.EXCEPT: 3929 operation = exp.Except 3930 else: 3931 operation = exp.Intersect 3932 3933 comments = self._prev.comments 3934 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3935 by_name = self._match_text_seq("BY", "NAME") 3936 expression = self._parse_select(nested=True, parse_set_operation=False) 3937 3938 this = self.expression( 3939 operation, 3940 comments=comments, 3941 this=this, 3942 distinct=distinct, 3943 by_name=by_name, 3944 expression=expression, 3945 ) 3946 3947 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3948 expression = this.expression 3949 3950 if expression: 3951 for arg in self.UNION_MODIFIERS: 3952 expr = expression.args.get(arg) 3953 if expr: 3954 this.set(arg, expr.pop()) 3955 3956 return this 3957 3958 def _parse_expression(self) -> t.Optional[exp.Expression]: 3959 return self._parse_alias(self._parse_conjunction()) 3960 3961 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3962 this = self._parse_equality() 3963 3964 if self._match(TokenType.COLON_EQ): 3965 this = self.expression( 3966 exp.PropertyEQ, 3967 this=this, 3968 comments=self._prev_comments, 3969 expression=self._parse_conjunction(), 3970 ) 3971 3972 while self._match_set(self.CONJUNCTION): 3973 this = self.expression( 3974 self.CONJUNCTION[self._prev.token_type], 3975 this=this, 3976 comments=self._prev_comments, 3977 expression=self._parse_equality(), 3978 ) 3979 return this 3980 3981 def _parse_equality(self) -> t.Optional[exp.Expression]: 3982 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3983 3984 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3985 return self._parse_tokens(self._parse_range, self.COMPARISON) 3986 3987 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3988 this = this or self._parse_bitwise() 3989 negate = self._match(TokenType.NOT) 3990 3991 if self._match_set(self.RANGE_PARSERS): 3992 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3993 if not expression: 3994 return this 3995 3996 this = expression 3997 elif self._match(TokenType.ISNULL): 3998 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3999 4000 # Postgres supports ISNULL and NOTNULL for conditions. 4001 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4002 if self._match(TokenType.NOTNULL): 4003 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4004 this = self.expression(exp.Not, this=this) 4005 4006 if negate: 4007 this = self.expression(exp.Not, this=this) 4008 4009 if self._match(TokenType.IS): 4010 this = self._parse_is(this) 4011 4012 return this 4013 4014 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4015 index = self._index - 1 4016 negate = self._match(TokenType.NOT) 4017 4018 if self._match_text_seq("DISTINCT", "FROM"): 4019 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4020 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4021 4022 expression = self._parse_null() or self._parse_boolean() 4023 if not expression: 4024 self._retreat(index) 4025 return None 4026 4027 this = self.expression(exp.Is, this=this, expression=expression) 4028 return self.expression(exp.Not, this=this) if negate else this 4029 4030 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4031 unnest = self._parse_unnest(with_alias=False) 4032 if unnest: 4033 this = self.expression(exp.In, this=this, unnest=unnest) 4034 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4035 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4036 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4037 4038 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4039 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4040 else: 4041 this = self.expression(exp.In, this=this, expressions=expressions) 4042 4043 if matched_l_paren: 4044 self._match_r_paren(this) 4045 elif not self._match(TokenType.R_BRACKET, expression=this): 4046 self.raise_error("Expecting ]") 4047 else: 4048 this = self.expression(exp.In, this=this, field=self._parse_field()) 4049 4050 return this 4051 4052 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4053 low = self._parse_bitwise() 4054 self._match(TokenType.AND) 4055 high = self._parse_bitwise() 4056 return self.expression(exp.Between, this=this, low=low, high=high) 4057 4058 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4059 if not self._match(TokenType.ESCAPE): 4060 return this 4061 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4062 4063 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 4064 index = self._index 4065 4066 if not self._match(TokenType.INTERVAL) and match_interval: 4067 return None 4068 4069 if self._match(TokenType.STRING, advance=False): 4070 this = self._parse_primary() 4071 else: 4072 this = self._parse_term() 4073 4074 if not this or ( 4075 isinstance(this, exp.Column) 4076 and not this.table 4077 and not this.this.quoted 4078 and this.name.upper() == "IS" 4079 ): 4080 self._retreat(index) 4081 return None 4082 4083 unit = self._parse_function() or ( 4084 not self._match(TokenType.ALIAS, advance=False) 4085 and self._parse_var(any_token=True, upper=True) 4086 ) 4087 4088 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4089 # each INTERVAL expression into this canonical form so it's easy to transpile 4090 if this and this.is_number: 4091 this = exp.Literal.string(this.name) 4092 elif this and this.is_string: 4093 parts = this.name.split() 4094 4095 if len(parts) == 2: 4096 if unit: 4097 # This is not actually a unit, it's something else (e.g. a "window side") 4098 unit = None 4099 self._retreat(self._index - 1) 4100 4101 this = exp.Literal.string(parts[0]) 4102 unit = self.expression(exp.Var, this=parts[1].upper()) 4103 4104 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4105 unit = self.expression( 4106 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4107 ) 4108 4109 return self.expression(exp.Interval, this=this, unit=unit) 4110 4111 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4112 this = self._parse_term() 4113 4114 while True: 4115 if self._match_set(self.BITWISE): 4116 this = self.expression( 4117 self.BITWISE[self._prev.token_type], 4118 this=this, 4119 expression=self._parse_term(), 4120 ) 4121 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4122 this = self.expression( 4123 exp.DPipe, 4124 this=this, 4125 expression=self._parse_term(), 4126 safe=not self.dialect.STRICT_STRING_CONCAT, 4127 ) 4128 elif self._match(TokenType.DQMARK): 4129 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4130 elif self._match_pair(TokenType.LT, TokenType.LT): 4131 this = self.expression( 4132 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4133 ) 4134 elif self._match_pair(TokenType.GT, TokenType.GT): 4135 this = self.expression( 4136 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4137 ) 4138 else: 4139 break 4140 4141 return this 4142 4143 def _parse_term(self) -> t.Optional[exp.Expression]: 4144 return self._parse_tokens(self._parse_factor, self.TERM) 4145 4146 def _parse_factor(self) -> t.Optional[exp.Expression]: 4147 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4148 this = parse_method() 4149 4150 while self._match_set(self.FACTOR): 4151 this = self.expression( 4152 self.FACTOR[self._prev.token_type], 4153 this=this, 4154 comments=self._prev_comments, 4155 expression=parse_method(), 4156 ) 4157 if isinstance(this, exp.Div): 4158 this.args["typed"] = self.dialect.TYPED_DIVISION 4159 this.args["safe"] = self.dialect.SAFE_DIVISION 4160 4161 return this 4162 4163 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4164 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4165 4166 def _parse_unary(self) -> t.Optional[exp.Expression]: 4167 if self._match_set(self.UNARY_PARSERS): 4168 return self.UNARY_PARSERS[self._prev.token_type](self) 4169 return self._parse_at_time_zone(self._parse_type()) 4170 4171 def _parse_type( 4172 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4173 ) -> t.Optional[exp.Expression]: 4174 interval = parse_interval and self._parse_interval() 4175 if interval: 4176 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4177 while True: 4178 index = self._index 4179 self._match(TokenType.PLUS) 4180 4181 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4182 self._retreat(index) 4183 break 4184 4185 interval = self.expression( # type: ignore 4186 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4187 ) 4188 4189 return interval 4190 4191 index = self._index 4192 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4193 this = self._parse_column() 4194 4195 if data_type: 4196 if isinstance(this, exp.Literal): 4197 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4198 if parser: 4199 return parser(self, this, data_type) 4200 return self.expression(exp.Cast, this=this, to=data_type) 4201 4202 if not data_type.expressions: 4203 self._retreat(index) 4204 return self._parse_id_var() if fallback_to_identifier else self._parse_column() 4205 4206 return self._parse_column_ops(data_type) 4207 4208 return this and self._parse_column_ops(this) 4209 4210 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4211 this = self._parse_type() 4212 if not this: 4213 return None 4214 4215 if isinstance(this, exp.Column) and not this.table: 4216 this = exp.var(this.name.upper()) 4217 4218 return self.expression( 4219 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4220 ) 4221 4222 def _parse_types( 4223 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4224 ) -> t.Optional[exp.Expression]: 4225 index = self._index 4226 4227 this: t.Optional[exp.Expression] = None 4228 prefix = self._match_text_seq("SYSUDTLIB", ".") 4229 4230 if not self._match_set(self.TYPE_TOKENS): 4231 identifier = allow_identifiers and self._parse_id_var( 4232 any_token=False, tokens=(TokenType.VAR,) 4233 ) 4234 if identifier: 4235 tokens = self.dialect.tokenize(identifier.name) 4236 4237 if len(tokens) != 1: 4238 self.raise_error("Unexpected identifier", self._prev) 4239 4240 if tokens[0].token_type in self.TYPE_TOKENS: 4241 self._prev = tokens[0] 4242 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4243 type_name = identifier.name 4244 4245 while self._match(TokenType.DOT): 4246 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4247 4248 this = exp.DataType.build(type_name, udt=True) 4249 else: 4250 self._retreat(self._index - 1) 4251 return None 4252 else: 4253 return None 4254 4255 type_token = self._prev.token_type 4256 4257 if type_token == TokenType.PSEUDO_TYPE: 4258 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4259 4260 if type_token == TokenType.OBJECT_IDENTIFIER: 4261 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4262 4263 nested = type_token in self.NESTED_TYPE_TOKENS 4264 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4265 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4266 expressions = None 4267 maybe_func = False 4268 4269 if self._match(TokenType.L_PAREN): 4270 if is_struct: 4271 expressions = self._parse_csv(self._parse_struct_types) 4272 elif nested: 4273 expressions = self._parse_csv( 4274 lambda: self._parse_types( 4275 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4276 ) 4277 ) 4278 elif type_token in self.ENUM_TYPE_TOKENS: 4279 expressions = self._parse_csv(self._parse_equality) 4280 elif is_aggregate: 4281 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4282 any_token=False, tokens=(TokenType.VAR,) 4283 ) 4284 if not func_or_ident or not self._match(TokenType.COMMA): 4285 return None 4286 expressions = self._parse_csv( 4287 lambda: self._parse_types( 4288 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4289 ) 4290 ) 4291 expressions.insert(0, func_or_ident) 4292 else: 4293 expressions = self._parse_csv(self._parse_type_size) 4294 4295 if not expressions or not self._match(TokenType.R_PAREN): 4296 self._retreat(index) 4297 return None 4298 4299 maybe_func = True 4300 4301 values: t.Optional[t.List[exp.Expression]] = None 4302 4303 if nested and self._match(TokenType.LT): 4304 if is_struct: 4305 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4306 else: 4307 expressions = self._parse_csv( 4308 lambda: self._parse_types( 4309 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4310 ) 4311 ) 4312 4313 if not self._match(TokenType.GT): 4314 self.raise_error("Expecting >") 4315 4316 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4317 values = self._parse_csv(self._parse_conjunction) 4318 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4319 4320 if type_token in self.TIMESTAMPS: 4321 if self._match_text_seq("WITH", "TIME", "ZONE"): 4322 maybe_func = False 4323 tz_type = ( 4324 exp.DataType.Type.TIMETZ 4325 if type_token in self.TIMES 4326 else exp.DataType.Type.TIMESTAMPTZ 4327 ) 4328 this = exp.DataType(this=tz_type, expressions=expressions) 4329 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4330 maybe_func = False 4331 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4332 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4333 maybe_func = False 4334 elif type_token == TokenType.INTERVAL: 4335 unit = self._parse_var(upper=True) 4336 if unit: 4337 if self._match_text_seq("TO"): 4338 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4339 4340 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4341 else: 4342 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4343 4344 if maybe_func and check_func: 4345 index2 = self._index 4346 peek = self._parse_string() 4347 4348 if not peek: 4349 self._retreat(index) 4350 return None 4351 4352 self._retreat(index2) 4353 4354 if not this: 4355 if self._match_text_seq("UNSIGNED"): 4356 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4357 if not unsigned_type_token: 4358 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4359 4360 type_token = unsigned_type_token or type_token 4361 4362 this = exp.DataType( 4363 this=exp.DataType.Type[type_token.value], 4364 expressions=expressions, 4365 nested=nested, 4366 values=values, 4367 prefix=prefix, 4368 ) 4369 elif expressions: 4370 this.set("expressions", expressions) 4371 4372 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4373 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4374 4375 if self.TYPE_CONVERTER and isinstance(this.this, exp.DataType.Type): 4376 converter = self.TYPE_CONVERTER.get(this.this) 4377 if converter: 4378 this = converter(t.cast(exp.DataType, this)) 4379 4380 return this 4381 4382 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4383 index = self._index 4384 this = ( 4385 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4386 or self._parse_id_var() 4387 ) 4388 self._match(TokenType.COLON) 4389 column_def = self._parse_column_def(this) 4390 4391 if type_required and ( 4392 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4393 ): 4394 self._retreat(index) 4395 return self._parse_types() 4396 4397 return column_def 4398 4399 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4400 if not self._match_text_seq("AT", "TIME", "ZONE"): 4401 return this 4402 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4403 4404 def _parse_column(self) -> t.Optional[exp.Expression]: 4405 this = self._parse_column_reference() 4406 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4407 4408 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4409 this = self._parse_field() 4410 if ( 4411 not this 4412 and self._match(TokenType.VALUES, advance=False) 4413 and self.VALUES_FOLLOWED_BY_PAREN 4414 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4415 ): 4416 this = self._parse_id_var() 4417 4418 if isinstance(this, exp.Identifier): 4419 # We bubble up comments from the Identifier to the Column 4420 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4421 4422 return this 4423 4424 def _parse_colon_as_json_extract( 4425 self, this: t.Optional[exp.Expression] 4426 ) -> t.Optional[exp.Expression]: 4427 casts = [] 4428 json_path = [] 4429 4430 while self._match(TokenType.COLON): 4431 start_index = self._index 4432 path = self._parse_column_ops(self._parse_field(any_token=True)) 4433 4434 # The cast :: operator has a lower precedence than the extraction operator :, so 4435 # we rearrange the AST appropriately to avoid casting the JSON path 4436 while isinstance(path, exp.Cast): 4437 casts.append(path.to) 4438 path = path.this 4439 4440 if casts: 4441 dcolon_offset = next( 4442 i 4443 for i, t in enumerate(self._tokens[start_index:]) 4444 if t.token_type == TokenType.DCOLON 4445 ) 4446 end_token = self._tokens[start_index + dcolon_offset - 1] 4447 else: 4448 end_token = self._prev 4449 4450 if path: 4451 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4452 4453 if json_path: 4454 this = self.expression( 4455 exp.JSONExtract, 4456 this=this, 4457 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4458 ) 4459 4460 while casts: 4461 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4462 4463 return this 4464 4465 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4466 this = self._parse_bracket(this) 4467 4468 while self._match_set(self.COLUMN_OPERATORS): 4469 op_token = self._prev.token_type 4470 op = self.COLUMN_OPERATORS.get(op_token) 4471 4472 if op_token == TokenType.DCOLON: 4473 field = self._parse_types() 4474 if not field: 4475 self.raise_error("Expected type") 4476 elif op and self._curr: 4477 field = self._parse_column_reference() 4478 else: 4479 field = self._parse_field(any_token=True, anonymous_func=True) 4480 4481 if isinstance(field, exp.Func) and this: 4482 # bigquery allows function calls like x.y.count(...) 4483 # SAFE.SUBSTR(...) 4484 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4485 this = exp.replace_tree( 4486 this, 4487 lambda n: ( 4488 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4489 if n.table 4490 else n.this 4491 ) 4492 if isinstance(n, exp.Column) 4493 else n, 4494 ) 4495 4496 if op: 4497 this = op(self, this, field) 4498 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4499 this = self.expression( 4500 exp.Column, 4501 this=field, 4502 table=this.this, 4503 db=this.args.get("table"), 4504 catalog=this.args.get("db"), 4505 ) 4506 else: 4507 this = self.expression(exp.Dot, this=this, expression=field) 4508 4509 this = self._parse_bracket(this) 4510 4511 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4512 4513 def _parse_primary(self) -> t.Optional[exp.Expression]: 4514 if self._match_set(self.PRIMARY_PARSERS): 4515 token_type = self._prev.token_type 4516 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4517 4518 if token_type == TokenType.STRING: 4519 expressions = [primary] 4520 while self._match(TokenType.STRING): 4521 expressions.append(exp.Literal.string(self._prev.text)) 4522 4523 if len(expressions) > 1: 4524 return self.expression(exp.Concat, expressions=expressions) 4525 4526 return primary 4527 4528 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4529 return exp.Literal.number(f"0.{self._prev.text}") 4530 4531 if self._match(TokenType.L_PAREN): 4532 comments = self._prev_comments 4533 query = self._parse_select() 4534 4535 if query: 4536 expressions = [query] 4537 else: 4538 expressions = self._parse_expressions() 4539 4540 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4541 4542 if not this and self._match(TokenType.R_PAREN, advance=False): 4543 this = self.expression(exp.Tuple) 4544 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4545 this = self._parse_subquery(this=this, parse_alias=False) 4546 elif isinstance(this, exp.Subquery): 4547 this = self._parse_subquery( 4548 this=self._parse_set_operations(this), parse_alias=False 4549 ) 4550 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4551 this = self.expression(exp.Tuple, expressions=expressions) 4552 else: 4553 this = self.expression(exp.Paren, this=this) 4554 4555 if this: 4556 this.add_comments(comments) 4557 4558 self._match_r_paren(expression=this) 4559 return this 4560 4561 return None 4562 4563 def _parse_field( 4564 self, 4565 any_token: bool = False, 4566 tokens: t.Optional[t.Collection[TokenType]] = None, 4567 anonymous_func: bool = False, 4568 ) -> t.Optional[exp.Expression]: 4569 if anonymous_func: 4570 field = ( 4571 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4572 or self._parse_primary() 4573 ) 4574 else: 4575 field = self._parse_primary() or self._parse_function( 4576 anonymous=anonymous_func, any_token=any_token 4577 ) 4578 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4579 4580 def _parse_function( 4581 self, 4582 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4583 anonymous: bool = False, 4584 optional_parens: bool = True, 4585 any_token: bool = False, 4586 ) -> t.Optional[exp.Expression]: 4587 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4588 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4589 fn_syntax = False 4590 if ( 4591 self._match(TokenType.L_BRACE, advance=False) 4592 and self._next 4593 and self._next.text.upper() == "FN" 4594 ): 4595 self._advance(2) 4596 fn_syntax = True 4597 4598 func = self._parse_function_call( 4599 functions=functions, 4600 anonymous=anonymous, 4601 optional_parens=optional_parens, 4602 any_token=any_token, 4603 ) 4604 4605 if fn_syntax: 4606 self._match(TokenType.R_BRACE) 4607 4608 return func 4609 4610 def _parse_function_call( 4611 self, 4612 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4613 anonymous: bool = False, 4614 optional_parens: bool = True, 4615 any_token: bool = False, 4616 ) -> t.Optional[exp.Expression]: 4617 if not self._curr: 4618 return None 4619 4620 comments = self._curr.comments 4621 token_type = self._curr.token_type 4622 this = self._curr.text 4623 upper = this.upper() 4624 4625 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4626 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4627 self._advance() 4628 return self._parse_window(parser(self)) 4629 4630 if not self._next or self._next.token_type != TokenType.L_PAREN: 4631 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4632 self._advance() 4633 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4634 4635 return None 4636 4637 if any_token: 4638 if token_type in self.RESERVED_TOKENS: 4639 return None 4640 elif token_type not in self.FUNC_TOKENS: 4641 return None 4642 4643 self._advance(2) 4644 4645 parser = self.FUNCTION_PARSERS.get(upper) 4646 if parser and not anonymous: 4647 this = parser(self) 4648 else: 4649 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4650 4651 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4652 this = self.expression(subquery_predicate, this=self._parse_select()) 4653 self._match_r_paren() 4654 return this 4655 4656 if functions is None: 4657 functions = self.FUNCTIONS 4658 4659 function = functions.get(upper) 4660 4661 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4662 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4663 4664 if alias: 4665 args = self._kv_to_prop_eq(args) 4666 4667 if function and not anonymous: 4668 if "dialect" in function.__code__.co_varnames: 4669 func = function(args, dialect=self.dialect) 4670 else: 4671 func = function(args) 4672 4673 func = self.validate_expression(func, args) 4674 if not self.dialect.NORMALIZE_FUNCTIONS: 4675 func.meta["name"] = this 4676 4677 this = func 4678 else: 4679 if token_type == TokenType.IDENTIFIER: 4680 this = exp.Identifier(this=this, quoted=True) 4681 this = self.expression(exp.Anonymous, this=this, expressions=args) 4682 4683 if isinstance(this, exp.Expression): 4684 this.add_comments(comments) 4685 4686 self._match_r_paren(this) 4687 return self._parse_window(this) 4688 4689 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4690 transformed = [] 4691 4692 for e in expressions: 4693 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4694 if isinstance(e, exp.Alias): 4695 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4696 4697 if not isinstance(e, exp.PropertyEQ): 4698 e = self.expression( 4699 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4700 ) 4701 4702 if isinstance(e.this, exp.Column): 4703 e.this.replace(e.this.this) 4704 4705 transformed.append(e) 4706 4707 return transformed 4708 4709 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4710 return self._parse_column_def(self._parse_id_var()) 4711 4712 def _parse_user_defined_function( 4713 self, kind: t.Optional[TokenType] = None 4714 ) -> t.Optional[exp.Expression]: 4715 this = self._parse_id_var() 4716 4717 while self._match(TokenType.DOT): 4718 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4719 4720 if not self._match(TokenType.L_PAREN): 4721 return this 4722 4723 expressions = self._parse_csv(self._parse_function_parameter) 4724 self._match_r_paren() 4725 return self.expression( 4726 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4727 ) 4728 4729 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4730 literal = self._parse_primary() 4731 if literal: 4732 return self.expression(exp.Introducer, this=token.text, expression=literal) 4733 4734 return self.expression(exp.Identifier, this=token.text) 4735 4736 def _parse_session_parameter(self) -> exp.SessionParameter: 4737 kind = None 4738 this = self._parse_id_var() or self._parse_primary() 4739 4740 if this and self._match(TokenType.DOT): 4741 kind = this.name 4742 this = self._parse_var() or self._parse_primary() 4743 4744 return self.expression(exp.SessionParameter, this=this, kind=kind) 4745 4746 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4747 return self._parse_id_var() 4748 4749 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4750 index = self._index 4751 4752 if self._match(TokenType.L_PAREN): 4753 expressions = t.cast( 4754 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4755 ) 4756 4757 if not self._match(TokenType.R_PAREN): 4758 self._retreat(index) 4759 else: 4760 expressions = [self._parse_lambda_arg()] 4761 4762 if self._match_set(self.LAMBDAS): 4763 return self.LAMBDAS[self._prev.token_type](self, expressions) 4764 4765 self._retreat(index) 4766 4767 this: t.Optional[exp.Expression] 4768 4769 if self._match(TokenType.DISTINCT): 4770 this = self.expression( 4771 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4772 ) 4773 else: 4774 this = self._parse_select_or_expression(alias=alias) 4775 4776 return self._parse_limit( 4777 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4778 ) 4779 4780 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4781 index = self._index 4782 if not self._match(TokenType.L_PAREN): 4783 return this 4784 4785 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4786 # expr can be of both types 4787 if self._match_set(self.SELECT_START_TOKENS): 4788 self._retreat(index) 4789 return this 4790 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4791 self._match_r_paren() 4792 return self.expression(exp.Schema, this=this, expressions=args) 4793 4794 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4795 return self._parse_column_def(self._parse_field(any_token=True)) 4796 4797 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4798 # column defs are not really columns, they're identifiers 4799 if isinstance(this, exp.Column): 4800 this = this.this 4801 4802 kind = self._parse_types(schema=True) 4803 4804 if self._match_text_seq("FOR", "ORDINALITY"): 4805 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4806 4807 constraints: t.List[exp.Expression] = [] 4808 4809 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4810 ("ALIAS", "MATERIALIZED") 4811 ): 4812 persisted = self._prev.text.upper() == "MATERIALIZED" 4813 constraints.append( 4814 self.expression( 4815 exp.ComputedColumnConstraint, 4816 this=self._parse_conjunction(), 4817 persisted=persisted or self._match_text_seq("PERSISTED"), 4818 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4819 ) 4820 ) 4821 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4822 self._match(TokenType.ALIAS) 4823 constraints.append( 4824 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4825 ) 4826 4827 while True: 4828 constraint = self._parse_column_constraint() 4829 if not constraint: 4830 break 4831 constraints.append(constraint) 4832 4833 if not kind and not constraints: 4834 return this 4835 4836 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4837 4838 def _parse_auto_increment( 4839 self, 4840 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4841 start = None 4842 increment = None 4843 4844 if self._match(TokenType.L_PAREN, advance=False): 4845 args = self._parse_wrapped_csv(self._parse_bitwise) 4846 start = seq_get(args, 0) 4847 increment = seq_get(args, 1) 4848 elif self._match_text_seq("START"): 4849 start = self._parse_bitwise() 4850 self._match_text_seq("INCREMENT") 4851 increment = self._parse_bitwise() 4852 4853 if start and increment: 4854 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4855 4856 return exp.AutoIncrementColumnConstraint() 4857 4858 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4859 if not self._match_text_seq("REFRESH"): 4860 self._retreat(self._index - 1) 4861 return None 4862 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4863 4864 def _parse_compress(self) -> exp.CompressColumnConstraint: 4865 if self._match(TokenType.L_PAREN, advance=False): 4866 return self.expression( 4867 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4868 ) 4869 4870 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4871 4872 def _parse_generated_as_identity( 4873 self, 4874 ) -> ( 4875 exp.GeneratedAsIdentityColumnConstraint 4876 | exp.ComputedColumnConstraint 4877 | exp.GeneratedAsRowColumnConstraint 4878 ): 4879 if self._match_text_seq("BY", "DEFAULT"): 4880 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4881 this = self.expression( 4882 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4883 ) 4884 else: 4885 self._match_text_seq("ALWAYS") 4886 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4887 4888 self._match(TokenType.ALIAS) 4889 4890 if self._match_text_seq("ROW"): 4891 start = self._match_text_seq("START") 4892 if not start: 4893 self._match(TokenType.END) 4894 hidden = self._match_text_seq("HIDDEN") 4895 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4896 4897 identity = self._match_text_seq("IDENTITY") 4898 4899 if self._match(TokenType.L_PAREN): 4900 if self._match(TokenType.START_WITH): 4901 this.set("start", self._parse_bitwise()) 4902 if self._match_text_seq("INCREMENT", "BY"): 4903 this.set("increment", self._parse_bitwise()) 4904 if self._match_text_seq("MINVALUE"): 4905 this.set("minvalue", self._parse_bitwise()) 4906 if self._match_text_seq("MAXVALUE"): 4907 this.set("maxvalue", self._parse_bitwise()) 4908 4909 if self._match_text_seq("CYCLE"): 4910 this.set("cycle", True) 4911 elif self._match_text_seq("NO", "CYCLE"): 4912 this.set("cycle", False) 4913 4914 if not identity: 4915 this.set("expression", self._parse_range()) 4916 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4917 args = self._parse_csv(self._parse_bitwise) 4918 this.set("start", seq_get(args, 0)) 4919 this.set("increment", seq_get(args, 1)) 4920 4921 self._match_r_paren() 4922 4923 return this 4924 4925 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4926 self._match_text_seq("LENGTH") 4927 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4928 4929 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4930 if self._match_text_seq("NULL"): 4931 return self.expression(exp.NotNullColumnConstraint) 4932 if self._match_text_seq("CASESPECIFIC"): 4933 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4934 if self._match_text_seq("FOR", "REPLICATION"): 4935 return self.expression(exp.NotForReplicationColumnConstraint) 4936 return None 4937 4938 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4939 if self._match(TokenType.CONSTRAINT): 4940 this = self._parse_id_var() 4941 else: 4942 this = None 4943 4944 if self._match_texts(self.CONSTRAINT_PARSERS): 4945 return self.expression( 4946 exp.ColumnConstraint, 4947 this=this, 4948 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4949 ) 4950 4951 return this 4952 4953 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4954 if not self._match(TokenType.CONSTRAINT): 4955 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4956 4957 return self.expression( 4958 exp.Constraint, 4959 this=self._parse_id_var(), 4960 expressions=self._parse_unnamed_constraints(), 4961 ) 4962 4963 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4964 constraints = [] 4965 while True: 4966 constraint = self._parse_unnamed_constraint() or self._parse_function() 4967 if not constraint: 4968 break 4969 constraints.append(constraint) 4970 4971 return constraints 4972 4973 def _parse_unnamed_constraint( 4974 self, constraints: t.Optional[t.Collection[str]] = None 4975 ) -> t.Optional[exp.Expression]: 4976 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4977 constraints or self.CONSTRAINT_PARSERS 4978 ): 4979 return None 4980 4981 constraint = self._prev.text.upper() 4982 if constraint not in self.CONSTRAINT_PARSERS: 4983 self.raise_error(f"No parser found for schema constraint {constraint}.") 4984 4985 return self.CONSTRAINT_PARSERS[constraint](self) 4986 4987 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4988 self._match_text_seq("KEY") 4989 return self.expression( 4990 exp.UniqueColumnConstraint, 4991 this=self._parse_schema(self._parse_id_var(any_token=False)), 4992 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4993 on_conflict=self._parse_on_conflict(), 4994 ) 4995 4996 def _parse_key_constraint_options(self) -> t.List[str]: 4997 options = [] 4998 while True: 4999 if not self._curr: 5000 break 5001 5002 if self._match(TokenType.ON): 5003 action = None 5004 on = self._advance_any() and self._prev.text 5005 5006 if self._match_text_seq("NO", "ACTION"): 5007 action = "NO ACTION" 5008 elif self._match_text_seq("CASCADE"): 5009 action = "CASCADE" 5010 elif self._match_text_seq("RESTRICT"): 5011 action = "RESTRICT" 5012 elif self._match_pair(TokenType.SET, TokenType.NULL): 5013 action = "SET NULL" 5014 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5015 action = "SET DEFAULT" 5016 else: 5017 self.raise_error("Invalid key constraint") 5018 5019 options.append(f"ON {on} {action}") 5020 elif self._match_text_seq("NOT", "ENFORCED"): 5021 options.append("NOT ENFORCED") 5022 elif self._match_text_seq("DEFERRABLE"): 5023 options.append("DEFERRABLE") 5024 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5025 options.append("INITIALLY DEFERRED") 5026 elif self._match_text_seq("NORELY"): 5027 options.append("NORELY") 5028 elif self._match_text_seq("MATCH", "FULL"): 5029 options.append("MATCH FULL") 5030 else: 5031 break 5032 5033 return options 5034 5035 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5036 if match and not self._match(TokenType.REFERENCES): 5037 return None 5038 5039 expressions = None 5040 this = self._parse_table(schema=True) 5041 options = self._parse_key_constraint_options() 5042 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5043 5044 def _parse_foreign_key(self) -> exp.ForeignKey: 5045 expressions = self._parse_wrapped_id_vars() 5046 reference = self._parse_references() 5047 options = {} 5048 5049 while self._match(TokenType.ON): 5050 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5051 self.raise_error("Expected DELETE or UPDATE") 5052 5053 kind = self._prev.text.lower() 5054 5055 if self._match_text_seq("NO", "ACTION"): 5056 action = "NO ACTION" 5057 elif self._match(TokenType.SET): 5058 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5059 action = "SET " + self._prev.text.upper() 5060 else: 5061 self._advance() 5062 action = self._prev.text.upper() 5063 5064 options[kind] = action 5065 5066 return self.expression( 5067 exp.ForeignKey, 5068 expressions=expressions, 5069 reference=reference, 5070 **options, # type: ignore 5071 ) 5072 5073 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5074 return self._parse_field() 5075 5076 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5077 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5078 self._retreat(self._index - 1) 5079 return None 5080 5081 id_vars = self._parse_wrapped_id_vars() 5082 return self.expression( 5083 exp.PeriodForSystemTimeConstraint, 5084 this=seq_get(id_vars, 0), 5085 expression=seq_get(id_vars, 1), 5086 ) 5087 5088 def _parse_primary_key( 5089 self, wrapped_optional: bool = False, in_props: bool = False 5090 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5091 desc = ( 5092 self._match_set((TokenType.ASC, TokenType.DESC)) 5093 and self._prev.token_type == TokenType.DESC 5094 ) 5095 5096 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5097 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5098 5099 expressions = self._parse_wrapped_csv( 5100 self._parse_primary_key_part, optional=wrapped_optional 5101 ) 5102 options = self._parse_key_constraint_options() 5103 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5104 5105 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5106 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 5107 5108 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5109 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5110 return this 5111 5112 bracket_kind = self._prev.token_type 5113 expressions = self._parse_csv( 5114 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5115 ) 5116 5117 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5118 self.raise_error("Expected ]") 5119 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5120 self.raise_error("Expected }") 5121 5122 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5123 if bracket_kind == TokenType.L_BRACE: 5124 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5125 elif not this or this.name.upper() == "ARRAY": 5126 this = self.expression(exp.Array, expressions=expressions) 5127 else: 5128 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5129 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5130 5131 self._add_comments(this) 5132 return self._parse_bracket(this) 5133 5134 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5135 if self._match(TokenType.COLON): 5136 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 5137 return this 5138 5139 def _parse_case(self) -> t.Optional[exp.Expression]: 5140 ifs = [] 5141 default = None 5142 5143 comments = self._prev_comments 5144 expression = self._parse_conjunction() 5145 5146 while self._match(TokenType.WHEN): 5147 this = self._parse_conjunction() 5148 self._match(TokenType.THEN) 5149 then = self._parse_conjunction() 5150 ifs.append(self.expression(exp.If, this=this, true=then)) 5151 5152 if self._match(TokenType.ELSE): 5153 default = self._parse_conjunction() 5154 5155 if not self._match(TokenType.END): 5156 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5157 default = exp.column("interval") 5158 else: 5159 self.raise_error("Expected END after CASE", self._prev) 5160 5161 return self.expression( 5162 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5163 ) 5164 5165 def _parse_if(self) -> t.Optional[exp.Expression]: 5166 if self._match(TokenType.L_PAREN): 5167 args = self._parse_csv(self._parse_conjunction) 5168 this = self.validate_expression(exp.If.from_arg_list(args), args) 5169 self._match_r_paren() 5170 else: 5171 index = self._index - 1 5172 5173 if self.NO_PAREN_IF_COMMANDS and index == 0: 5174 return self._parse_as_command(self._prev) 5175 5176 condition = self._parse_conjunction() 5177 5178 if not condition: 5179 self._retreat(index) 5180 return None 5181 5182 self._match(TokenType.THEN) 5183 true = self._parse_conjunction() 5184 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 5185 self._match(TokenType.END) 5186 this = self.expression(exp.If, this=condition, true=true, false=false) 5187 5188 return this 5189 5190 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5191 if not self._match_text_seq("VALUE", "FOR"): 5192 self._retreat(self._index - 1) 5193 return None 5194 5195 return self.expression( 5196 exp.NextValueFor, 5197 this=self._parse_column(), 5198 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5199 ) 5200 5201 def _parse_extract(self) -> exp.Extract: 5202 this = self._parse_function() or self._parse_var() or self._parse_type() 5203 5204 if self._match(TokenType.FROM): 5205 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5206 5207 if not self._match(TokenType.COMMA): 5208 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5209 5210 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5211 5212 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5213 this = self._parse_conjunction() 5214 5215 if not self._match(TokenType.ALIAS): 5216 if self._match(TokenType.COMMA): 5217 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5218 5219 self.raise_error("Expected AS after CAST") 5220 5221 fmt = None 5222 to = self._parse_types() 5223 5224 if self._match(TokenType.FORMAT): 5225 fmt_string = self._parse_string() 5226 fmt = self._parse_at_time_zone(fmt_string) 5227 5228 if not to: 5229 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5230 if to.this in exp.DataType.TEMPORAL_TYPES: 5231 this = self.expression( 5232 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5233 this=this, 5234 format=exp.Literal.string( 5235 format_time( 5236 fmt_string.this if fmt_string else "", 5237 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5238 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5239 ) 5240 ), 5241 ) 5242 5243 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5244 this.set("zone", fmt.args["zone"]) 5245 return this 5246 elif not to: 5247 self.raise_error("Expected TYPE after CAST") 5248 elif isinstance(to, exp.Identifier): 5249 to = exp.DataType.build(to.name, udt=True) 5250 elif to.this == exp.DataType.Type.CHAR: 5251 if self._match(TokenType.CHARACTER_SET): 5252 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5253 5254 return self.expression( 5255 exp.Cast if strict else exp.TryCast, 5256 this=this, 5257 to=to, 5258 format=fmt, 5259 safe=safe, 5260 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5261 ) 5262 5263 def _parse_string_agg(self) -> exp.Expression: 5264 if self._match(TokenType.DISTINCT): 5265 args: t.List[t.Optional[exp.Expression]] = [ 5266 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5267 ] 5268 if self._match(TokenType.COMMA): 5269 args.extend(self._parse_csv(self._parse_conjunction)) 5270 else: 5271 args = self._parse_csv(self._parse_conjunction) # type: ignore 5272 5273 index = self._index 5274 if not self._match(TokenType.R_PAREN) and args: 5275 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5276 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5277 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5278 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5279 5280 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5281 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5282 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5283 if not self._match_text_seq("WITHIN", "GROUP"): 5284 self._retreat(index) 5285 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5286 5287 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5288 order = self._parse_order(this=seq_get(args, 0)) 5289 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5290 5291 def _parse_convert( 5292 self, strict: bool, safe: t.Optional[bool] = None 5293 ) -> t.Optional[exp.Expression]: 5294 this = self._parse_bitwise() 5295 5296 if self._match(TokenType.USING): 5297 to: t.Optional[exp.Expression] = self.expression( 5298 exp.CharacterSet, this=self._parse_var() 5299 ) 5300 elif self._match(TokenType.COMMA): 5301 to = self._parse_types() 5302 else: 5303 to = None 5304 5305 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5306 5307 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5308 """ 5309 There are generally two variants of the DECODE function: 5310 5311 - DECODE(bin, charset) 5312 - DECODE(expression, search, result [, search, result] ... [, default]) 5313 5314 The second variant will always be parsed into a CASE expression. Note that NULL 5315 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5316 instead of relying on pattern matching. 5317 """ 5318 args = self._parse_csv(self._parse_conjunction) 5319 5320 if len(args) < 3: 5321 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5322 5323 expression, *expressions = args 5324 if not expression: 5325 return None 5326 5327 ifs = [] 5328 for search, result in zip(expressions[::2], expressions[1::2]): 5329 if not search or not result: 5330 return None 5331 5332 if isinstance(search, exp.Literal): 5333 ifs.append( 5334 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5335 ) 5336 elif isinstance(search, exp.Null): 5337 ifs.append( 5338 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5339 ) 5340 else: 5341 cond = exp.or_( 5342 exp.EQ(this=expression.copy(), expression=search), 5343 exp.and_( 5344 exp.Is(this=expression.copy(), expression=exp.Null()), 5345 exp.Is(this=search.copy(), expression=exp.Null()), 5346 copy=False, 5347 ), 5348 copy=False, 5349 ) 5350 ifs.append(exp.If(this=cond, true=result)) 5351 5352 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5353 5354 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5355 self._match_text_seq("KEY") 5356 key = self._parse_column() 5357 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5358 self._match_text_seq("VALUE") 5359 value = self._parse_bitwise() 5360 5361 if not key and not value: 5362 return None 5363 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5364 5365 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5366 if not this or not self._match_text_seq("FORMAT", "JSON"): 5367 return this 5368 5369 return self.expression(exp.FormatJson, this=this) 5370 5371 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5372 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5373 for value in values: 5374 if self._match_text_seq(value, "ON", on): 5375 return f"{value} ON {on}" 5376 5377 return None 5378 5379 @t.overload 5380 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5381 5382 @t.overload 5383 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5384 5385 def _parse_json_object(self, agg=False): 5386 star = self._parse_star() 5387 expressions = ( 5388 [star] 5389 if star 5390 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5391 ) 5392 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5393 5394 unique_keys = None 5395 if self._match_text_seq("WITH", "UNIQUE"): 5396 unique_keys = True 5397 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5398 unique_keys = False 5399 5400 self._match_text_seq("KEYS") 5401 5402 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5403 self._parse_type() 5404 ) 5405 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5406 5407 return self.expression( 5408 exp.JSONObjectAgg if agg else exp.JSONObject, 5409 expressions=expressions, 5410 null_handling=null_handling, 5411 unique_keys=unique_keys, 5412 return_type=return_type, 5413 encoding=encoding, 5414 ) 5415 5416 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5417 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5418 if not self._match_text_seq("NESTED"): 5419 this = self._parse_id_var() 5420 kind = self._parse_types(allow_identifiers=False) 5421 nested = None 5422 else: 5423 this = None 5424 kind = None 5425 nested = True 5426 5427 path = self._match_text_seq("PATH") and self._parse_string() 5428 nested_schema = nested and self._parse_json_schema() 5429 5430 return self.expression( 5431 exp.JSONColumnDef, 5432 this=this, 5433 kind=kind, 5434 path=path, 5435 nested_schema=nested_schema, 5436 ) 5437 5438 def _parse_json_schema(self) -> exp.JSONSchema: 5439 self._match_text_seq("COLUMNS") 5440 return self.expression( 5441 exp.JSONSchema, 5442 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5443 ) 5444 5445 def _parse_json_table(self) -> exp.JSONTable: 5446 this = self._parse_format_json(self._parse_bitwise()) 5447 path = self._match(TokenType.COMMA) and self._parse_string() 5448 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5449 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5450 schema = self._parse_json_schema() 5451 5452 return exp.JSONTable( 5453 this=this, 5454 schema=schema, 5455 path=path, 5456 error_handling=error_handling, 5457 empty_handling=empty_handling, 5458 ) 5459 5460 def _parse_match_against(self) -> exp.MatchAgainst: 5461 expressions = self._parse_csv(self._parse_column) 5462 5463 self._match_text_seq(")", "AGAINST", "(") 5464 5465 this = self._parse_string() 5466 5467 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5468 modifier = "IN NATURAL LANGUAGE MODE" 5469 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5470 modifier = f"{modifier} WITH QUERY EXPANSION" 5471 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5472 modifier = "IN BOOLEAN MODE" 5473 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5474 modifier = "WITH QUERY EXPANSION" 5475 else: 5476 modifier = None 5477 5478 return self.expression( 5479 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5480 ) 5481 5482 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5483 def _parse_open_json(self) -> exp.OpenJSON: 5484 this = self._parse_bitwise() 5485 path = self._match(TokenType.COMMA) and self._parse_string() 5486 5487 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5488 this = self._parse_field(any_token=True) 5489 kind = self._parse_types() 5490 path = self._parse_string() 5491 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5492 5493 return self.expression( 5494 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5495 ) 5496 5497 expressions = None 5498 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5499 self._match_l_paren() 5500 expressions = self._parse_csv(_parse_open_json_column_def) 5501 5502 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5503 5504 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5505 args = self._parse_csv(self._parse_bitwise) 5506 5507 if self._match(TokenType.IN): 5508 return self.expression( 5509 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5510 ) 5511 5512 if haystack_first: 5513 haystack = seq_get(args, 0) 5514 needle = seq_get(args, 1) 5515 else: 5516 needle = seq_get(args, 0) 5517 haystack = seq_get(args, 1) 5518 5519 return self.expression( 5520 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5521 ) 5522 5523 def _parse_predict(self) -> exp.Predict: 5524 self._match_text_seq("MODEL") 5525 this = self._parse_table() 5526 5527 self._match(TokenType.COMMA) 5528 self._match_text_seq("TABLE") 5529 5530 return self.expression( 5531 exp.Predict, 5532 this=this, 5533 expression=self._parse_table(), 5534 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5535 ) 5536 5537 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5538 args = self._parse_csv(self._parse_table) 5539 return exp.JoinHint(this=func_name.upper(), expressions=args) 5540 5541 def _parse_substring(self) -> exp.Substring: 5542 # Postgres supports the form: substring(string [from int] [for int]) 5543 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5544 5545 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5546 5547 if self._match(TokenType.FROM): 5548 args.append(self._parse_bitwise()) 5549 if self._match(TokenType.FOR): 5550 if len(args) == 1: 5551 args.append(exp.Literal.number(1)) 5552 args.append(self._parse_bitwise()) 5553 5554 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5555 5556 def _parse_trim(self) -> exp.Trim: 5557 # https://www.w3resource.com/sql/character-functions/trim.php 5558 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5559 5560 position = None 5561 collation = None 5562 expression = None 5563 5564 if self._match_texts(self.TRIM_TYPES): 5565 position = self._prev.text.upper() 5566 5567 this = self._parse_bitwise() 5568 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5569 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5570 expression = self._parse_bitwise() 5571 5572 if invert_order: 5573 this, expression = expression, this 5574 5575 if self._match(TokenType.COLLATE): 5576 collation = self._parse_bitwise() 5577 5578 return self.expression( 5579 exp.Trim, this=this, position=position, expression=expression, collation=collation 5580 ) 5581 5582 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5583 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5584 5585 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5586 return self._parse_window(self._parse_id_var(), alias=True) 5587 5588 def _parse_respect_or_ignore_nulls( 5589 self, this: t.Optional[exp.Expression] 5590 ) -> t.Optional[exp.Expression]: 5591 if self._match_text_seq("IGNORE", "NULLS"): 5592 return self.expression(exp.IgnoreNulls, this=this) 5593 if self._match_text_seq("RESPECT", "NULLS"): 5594 return self.expression(exp.RespectNulls, this=this) 5595 return this 5596 5597 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5598 if self._match(TokenType.HAVING): 5599 self._match_texts(("MAX", "MIN")) 5600 max = self._prev.text.upper() != "MIN" 5601 return self.expression( 5602 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5603 ) 5604 5605 return this 5606 5607 def _parse_window( 5608 self, this: t.Optional[exp.Expression], alias: bool = False 5609 ) -> t.Optional[exp.Expression]: 5610 func = this 5611 comments = func.comments if isinstance(func, exp.Expression) else None 5612 5613 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5614 self._match(TokenType.WHERE) 5615 this = self.expression( 5616 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5617 ) 5618 self._match_r_paren() 5619 5620 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5621 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5622 if self._match_text_seq("WITHIN", "GROUP"): 5623 order = self._parse_wrapped(self._parse_order) 5624 this = self.expression(exp.WithinGroup, this=this, expression=order) 5625 5626 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5627 # Some dialects choose to implement and some do not. 5628 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5629 5630 # There is some code above in _parse_lambda that handles 5631 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5632 5633 # The below changes handle 5634 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5635 5636 # Oracle allows both formats 5637 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5638 # and Snowflake chose to do the same for familiarity 5639 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5640 if isinstance(this, exp.AggFunc): 5641 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5642 5643 if ignore_respect and ignore_respect is not this: 5644 ignore_respect.replace(ignore_respect.this) 5645 this = self.expression(ignore_respect.__class__, this=this) 5646 5647 this = self._parse_respect_or_ignore_nulls(this) 5648 5649 # bigquery select from window x AS (partition by ...) 5650 if alias: 5651 over = None 5652 self._match(TokenType.ALIAS) 5653 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5654 return this 5655 else: 5656 over = self._prev.text.upper() 5657 5658 if comments and isinstance(func, exp.Expression): 5659 func.pop_comments() 5660 5661 if not self._match(TokenType.L_PAREN): 5662 return self.expression( 5663 exp.Window, 5664 comments=comments, 5665 this=this, 5666 alias=self._parse_id_var(False), 5667 over=over, 5668 ) 5669 5670 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5671 5672 first = self._match(TokenType.FIRST) 5673 if self._match_text_seq("LAST"): 5674 first = False 5675 5676 partition, order = self._parse_partition_and_order() 5677 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5678 5679 if kind: 5680 self._match(TokenType.BETWEEN) 5681 start = self._parse_window_spec() 5682 self._match(TokenType.AND) 5683 end = self._parse_window_spec() 5684 5685 spec = self.expression( 5686 exp.WindowSpec, 5687 kind=kind, 5688 start=start["value"], 5689 start_side=start["side"], 5690 end=end["value"], 5691 end_side=end["side"], 5692 ) 5693 else: 5694 spec = None 5695 5696 self._match_r_paren() 5697 5698 window = self.expression( 5699 exp.Window, 5700 comments=comments, 5701 this=this, 5702 partition_by=partition, 5703 order=order, 5704 spec=spec, 5705 alias=window_alias, 5706 over=over, 5707 first=first, 5708 ) 5709 5710 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5711 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5712 return self._parse_window(window, alias=alias) 5713 5714 return window 5715 5716 def _parse_partition_and_order( 5717 self, 5718 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5719 return self._parse_partition_by(), self._parse_order() 5720 5721 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5722 self._match(TokenType.BETWEEN) 5723 5724 return { 5725 "value": ( 5726 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5727 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5728 or self._parse_bitwise() 5729 ), 5730 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5731 } 5732 5733 def _parse_alias( 5734 self, this: t.Optional[exp.Expression], explicit: bool = False 5735 ) -> t.Optional[exp.Expression]: 5736 any_token = self._match(TokenType.ALIAS) 5737 comments = self._prev_comments or [] 5738 5739 if explicit and not any_token: 5740 return this 5741 5742 if self._match(TokenType.L_PAREN): 5743 aliases = self.expression( 5744 exp.Aliases, 5745 comments=comments, 5746 this=this, 5747 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5748 ) 5749 self._match_r_paren(aliases) 5750 return aliases 5751 5752 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5753 self.STRING_ALIASES and self._parse_string_as_identifier() 5754 ) 5755 5756 if alias: 5757 comments.extend(alias.pop_comments()) 5758 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5759 column = this.this 5760 5761 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5762 if not this.comments and column and column.comments: 5763 this.comments = column.pop_comments() 5764 5765 return this 5766 5767 def _parse_id_var( 5768 self, 5769 any_token: bool = True, 5770 tokens: t.Optional[t.Collection[TokenType]] = None, 5771 ) -> t.Optional[exp.Expression]: 5772 expression = self._parse_identifier() 5773 if not expression and ( 5774 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5775 ): 5776 quoted = self._prev.token_type == TokenType.STRING 5777 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5778 5779 return expression 5780 5781 def _parse_string(self) -> t.Optional[exp.Expression]: 5782 if self._match_set(self.STRING_PARSERS): 5783 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5784 return self._parse_placeholder() 5785 5786 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5787 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5788 5789 def _parse_number(self) -> t.Optional[exp.Expression]: 5790 if self._match_set(self.NUMERIC_PARSERS): 5791 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5792 return self._parse_placeholder() 5793 5794 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5795 if self._match(TokenType.IDENTIFIER): 5796 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5797 return self._parse_placeholder() 5798 5799 def _parse_var( 5800 self, 5801 any_token: bool = False, 5802 tokens: t.Optional[t.Collection[TokenType]] = None, 5803 upper: bool = False, 5804 ) -> t.Optional[exp.Expression]: 5805 if ( 5806 (any_token and self._advance_any()) 5807 or self._match(TokenType.VAR) 5808 or (self._match_set(tokens) if tokens else False) 5809 ): 5810 return self.expression( 5811 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5812 ) 5813 return self._parse_placeholder() 5814 5815 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5816 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5817 self._advance() 5818 return self._prev 5819 return None 5820 5821 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5822 return self._parse_var() or self._parse_string() 5823 5824 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5825 return self._parse_primary() or self._parse_var(any_token=True) 5826 5827 def _parse_null(self) -> t.Optional[exp.Expression]: 5828 if self._match_set(self.NULL_TOKENS): 5829 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5830 return self._parse_placeholder() 5831 5832 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5833 if self._match(TokenType.TRUE): 5834 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5835 if self._match(TokenType.FALSE): 5836 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5837 return self._parse_placeholder() 5838 5839 def _parse_star(self) -> t.Optional[exp.Expression]: 5840 if self._match(TokenType.STAR): 5841 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5842 return self._parse_placeholder() 5843 5844 def _parse_parameter(self) -> exp.Parameter: 5845 this = self._parse_identifier() or self._parse_primary_or_var() 5846 return self.expression(exp.Parameter, this=this) 5847 5848 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5849 if self._match_set(self.PLACEHOLDER_PARSERS): 5850 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5851 if placeholder: 5852 return placeholder 5853 self._advance(-1) 5854 return None 5855 5856 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5857 if not self._match_texts(keywords): 5858 return None 5859 if self._match(TokenType.L_PAREN, advance=False): 5860 return self._parse_wrapped_csv(self._parse_expression) 5861 5862 expression = self._parse_expression() 5863 return [expression] if expression else None 5864 5865 def _parse_csv( 5866 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5867 ) -> t.List[exp.Expression]: 5868 parse_result = parse_method() 5869 items = [parse_result] if parse_result is not None else [] 5870 5871 while self._match(sep): 5872 self._add_comments(parse_result) 5873 parse_result = parse_method() 5874 if parse_result is not None: 5875 items.append(parse_result) 5876 5877 return items 5878 5879 def _parse_tokens( 5880 self, parse_method: t.Callable, expressions: t.Dict 5881 ) -> t.Optional[exp.Expression]: 5882 this = parse_method() 5883 5884 while self._match_set(expressions): 5885 this = self.expression( 5886 expressions[self._prev.token_type], 5887 this=this, 5888 comments=self._prev_comments, 5889 expression=parse_method(), 5890 ) 5891 5892 return this 5893 5894 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5895 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5896 5897 def _parse_wrapped_csv( 5898 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5899 ) -> t.List[exp.Expression]: 5900 return self._parse_wrapped( 5901 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5902 ) 5903 5904 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5905 wrapped = self._match(TokenType.L_PAREN) 5906 if not wrapped and not optional: 5907 self.raise_error("Expecting (") 5908 parse_result = parse_method() 5909 if wrapped: 5910 self._match_r_paren() 5911 return parse_result 5912 5913 def _parse_expressions(self) -> t.List[exp.Expression]: 5914 return self._parse_csv(self._parse_expression) 5915 5916 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5917 return self._parse_select() or self._parse_set_operations( 5918 self._parse_expression() if alias else self._parse_conjunction() 5919 ) 5920 5921 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5922 return self._parse_query_modifiers( 5923 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5924 ) 5925 5926 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5927 this = None 5928 if self._match_texts(self.TRANSACTION_KIND): 5929 this = self._prev.text 5930 5931 self._match_texts(("TRANSACTION", "WORK")) 5932 5933 modes = [] 5934 while True: 5935 mode = [] 5936 while self._match(TokenType.VAR): 5937 mode.append(self._prev.text) 5938 5939 if mode: 5940 modes.append(" ".join(mode)) 5941 if not self._match(TokenType.COMMA): 5942 break 5943 5944 return self.expression(exp.Transaction, this=this, modes=modes) 5945 5946 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5947 chain = None 5948 savepoint = None 5949 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5950 5951 self._match_texts(("TRANSACTION", "WORK")) 5952 5953 if self._match_text_seq("TO"): 5954 self._match_text_seq("SAVEPOINT") 5955 savepoint = self._parse_id_var() 5956 5957 if self._match(TokenType.AND): 5958 chain = not self._match_text_seq("NO") 5959 self._match_text_seq("CHAIN") 5960 5961 if is_rollback: 5962 return self.expression(exp.Rollback, savepoint=savepoint) 5963 5964 return self.expression(exp.Commit, chain=chain) 5965 5966 def _parse_refresh(self) -> exp.Refresh: 5967 self._match(TokenType.TABLE) 5968 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5969 5970 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5971 if not self._match_text_seq("ADD"): 5972 return None 5973 5974 self._match(TokenType.COLUMN) 5975 exists_column = self._parse_exists(not_=True) 5976 expression = self._parse_field_def() 5977 5978 if expression: 5979 expression.set("exists", exists_column) 5980 5981 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5982 if self._match_texts(("FIRST", "AFTER")): 5983 position = self._prev.text 5984 column_position = self.expression( 5985 exp.ColumnPosition, this=self._parse_column(), position=position 5986 ) 5987 expression.set("position", column_position) 5988 5989 return expression 5990 5991 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5992 drop = self._match(TokenType.DROP) and self._parse_drop() 5993 if drop and not isinstance(drop, exp.Command): 5994 drop.set("kind", drop.args.get("kind", "COLUMN")) 5995 return drop 5996 5997 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5998 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5999 return self.expression( 6000 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6001 ) 6002 6003 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6004 index = self._index - 1 6005 6006 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6007 return self._parse_csv( 6008 lambda: self.expression( 6009 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6010 ) 6011 ) 6012 6013 self._retreat(index) 6014 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6015 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6016 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6017 6018 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6019 if self._match_texts(self.ALTER_ALTER_PARSERS): 6020 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6021 6022 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6023 # keyword after ALTER we default to parsing this statement 6024 self._match(TokenType.COLUMN) 6025 column = self._parse_field(any_token=True) 6026 6027 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6028 return self.expression(exp.AlterColumn, this=column, drop=True) 6029 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6030 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 6031 if self._match(TokenType.COMMENT): 6032 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6033 6034 self._match_text_seq("SET", "DATA") 6035 self._match_text_seq("TYPE") 6036 return self.expression( 6037 exp.AlterColumn, 6038 this=column, 6039 dtype=self._parse_types(), 6040 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6041 using=self._match(TokenType.USING) and self._parse_conjunction(), 6042 ) 6043 6044 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6045 if self._match_texts(("ALL", "EVEN", "AUTO")): 6046 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6047 6048 self._match_text_seq("KEY", "DISTKEY") 6049 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6050 6051 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6052 if compound: 6053 self._match_text_seq("SORTKEY") 6054 6055 if self._match(TokenType.L_PAREN, advance=False): 6056 return self.expression( 6057 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6058 ) 6059 6060 self._match_texts(("AUTO", "NONE")) 6061 return self.expression( 6062 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6063 ) 6064 6065 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6066 index = self._index - 1 6067 6068 partition_exists = self._parse_exists() 6069 if self._match(TokenType.PARTITION, advance=False): 6070 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6071 6072 self._retreat(index) 6073 return self._parse_csv(self._parse_drop_column) 6074 6075 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6076 if self._match(TokenType.COLUMN): 6077 exists = self._parse_exists() 6078 old_column = self._parse_column() 6079 to = self._match_text_seq("TO") 6080 new_column = self._parse_column() 6081 6082 if old_column is None or to is None or new_column is None: 6083 return None 6084 6085 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6086 6087 self._match_text_seq("TO") 6088 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6089 6090 def _parse_alter_table_set(self) -> exp.AlterSet: 6091 alter_set = self.expression(exp.AlterSet) 6092 6093 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6094 "TABLE", "PROPERTIES" 6095 ): 6096 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_conjunction)) 6097 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6098 alter_set.set("expressions", [self._parse_conjunction()]) 6099 elif self._match_texts(("LOGGED", "UNLOGGED")): 6100 alter_set.set("option", exp.var(self._prev.text.upper())) 6101 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6102 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6103 elif self._match_text_seq("LOCATION"): 6104 alter_set.set("location", self._parse_field()) 6105 elif self._match_text_seq("ACCESS", "METHOD"): 6106 alter_set.set("access_method", self._parse_field()) 6107 elif self._match_text_seq("TABLESPACE"): 6108 alter_set.set("tablespace", self._parse_field()) 6109 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6110 alter_set.set("file_format", [self._parse_field()]) 6111 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6112 alter_set.set("file_format", self._parse_wrapped_options()) 6113 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6114 alter_set.set("copy_options", self._parse_wrapped_options()) 6115 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6116 alter_set.set("tag", self._parse_csv(self._parse_conjunction)) 6117 else: 6118 if self._match_text_seq("SERDE"): 6119 alter_set.set("serde", self._parse_field()) 6120 6121 alter_set.set("expressions", [self._parse_properties()]) 6122 6123 return alter_set 6124 6125 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6126 start = self._prev 6127 6128 if not self._match(TokenType.TABLE): 6129 return self._parse_as_command(start) 6130 6131 exists = self._parse_exists() 6132 only = self._match_text_seq("ONLY") 6133 this = self._parse_table(schema=True) 6134 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6135 6136 if self._next: 6137 self._advance() 6138 6139 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6140 if parser: 6141 actions = ensure_list(parser(self)) 6142 options = self._parse_csv(self._parse_property) 6143 6144 if not self._curr and actions: 6145 return self.expression( 6146 exp.AlterTable, 6147 this=this, 6148 exists=exists, 6149 actions=actions, 6150 only=only, 6151 options=options, 6152 cluster=cluster, 6153 ) 6154 6155 return self._parse_as_command(start) 6156 6157 def _parse_merge(self) -> exp.Merge: 6158 self._match(TokenType.INTO) 6159 target = self._parse_table() 6160 6161 if target and self._match(TokenType.ALIAS, advance=False): 6162 target.set("alias", self._parse_table_alias()) 6163 6164 self._match(TokenType.USING) 6165 using = self._parse_table() 6166 6167 self._match(TokenType.ON) 6168 on = self._parse_conjunction() 6169 6170 return self.expression( 6171 exp.Merge, 6172 this=target, 6173 using=using, 6174 on=on, 6175 expressions=self._parse_when_matched(), 6176 ) 6177 6178 def _parse_when_matched(self) -> t.List[exp.When]: 6179 whens = [] 6180 6181 while self._match(TokenType.WHEN): 6182 matched = not self._match(TokenType.NOT) 6183 self._match_text_seq("MATCHED") 6184 source = ( 6185 False 6186 if self._match_text_seq("BY", "TARGET") 6187 else self._match_text_seq("BY", "SOURCE") 6188 ) 6189 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 6190 6191 self._match(TokenType.THEN) 6192 6193 if self._match(TokenType.INSERT): 6194 _this = self._parse_star() 6195 if _this: 6196 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6197 else: 6198 then = self.expression( 6199 exp.Insert, 6200 this=self._parse_value(), 6201 expression=self._match_text_seq("VALUES") and self._parse_value(), 6202 ) 6203 elif self._match(TokenType.UPDATE): 6204 expressions = self._parse_star() 6205 if expressions: 6206 then = self.expression(exp.Update, expressions=expressions) 6207 else: 6208 then = self.expression( 6209 exp.Update, 6210 expressions=self._match(TokenType.SET) 6211 and self._parse_csv(self._parse_equality), 6212 ) 6213 elif self._match(TokenType.DELETE): 6214 then = self.expression(exp.Var, this=self._prev.text) 6215 else: 6216 then = None 6217 6218 whens.append( 6219 self.expression( 6220 exp.When, 6221 matched=matched, 6222 source=source, 6223 condition=condition, 6224 then=then, 6225 ) 6226 ) 6227 return whens 6228 6229 def _parse_show(self) -> t.Optional[exp.Expression]: 6230 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6231 if parser: 6232 return parser(self) 6233 return self._parse_as_command(self._prev) 6234 6235 def _parse_set_item_assignment( 6236 self, kind: t.Optional[str] = None 6237 ) -> t.Optional[exp.Expression]: 6238 index = self._index 6239 6240 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6241 return self._parse_set_transaction(global_=kind == "GLOBAL") 6242 6243 left = self._parse_primary() or self._parse_column() 6244 assignment_delimiter = self._match_texts(("=", "TO")) 6245 6246 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6247 self._retreat(index) 6248 return None 6249 6250 right = self._parse_statement() or self._parse_id_var() 6251 if isinstance(right, (exp.Column, exp.Identifier)): 6252 right = exp.var(right.name) 6253 6254 this = self.expression(exp.EQ, this=left, expression=right) 6255 return self.expression(exp.SetItem, this=this, kind=kind) 6256 6257 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6258 self._match_text_seq("TRANSACTION") 6259 characteristics = self._parse_csv( 6260 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6261 ) 6262 return self.expression( 6263 exp.SetItem, 6264 expressions=characteristics, 6265 kind="TRANSACTION", 6266 **{"global": global_}, # type: ignore 6267 ) 6268 6269 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6270 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6271 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6272 6273 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6274 index = self._index 6275 set_ = self.expression( 6276 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6277 ) 6278 6279 if self._curr: 6280 self._retreat(index) 6281 return self._parse_as_command(self._prev) 6282 6283 return set_ 6284 6285 def _parse_var_from_options( 6286 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6287 ) -> t.Optional[exp.Var]: 6288 start = self._curr 6289 if not start: 6290 return None 6291 6292 option = start.text.upper() 6293 continuations = options.get(option) 6294 6295 index = self._index 6296 self._advance() 6297 for keywords in continuations or []: 6298 if isinstance(keywords, str): 6299 keywords = (keywords,) 6300 6301 if self._match_text_seq(*keywords): 6302 option = f"{option} {' '.join(keywords)}" 6303 break 6304 else: 6305 if continuations or continuations is None: 6306 if raise_unmatched: 6307 self.raise_error(f"Unknown option {option}") 6308 6309 self._retreat(index) 6310 return None 6311 6312 return exp.var(option) 6313 6314 def _parse_as_command(self, start: Token) -> exp.Command: 6315 while self._curr: 6316 self._advance() 6317 text = self._find_sql(start, self._prev) 6318 size = len(start.text) 6319 self._warn_unsupported() 6320 return exp.Command(this=text[:size], expression=text[size:]) 6321 6322 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6323 settings = [] 6324 6325 self._match_l_paren() 6326 kind = self._parse_id_var() 6327 6328 if self._match(TokenType.L_PAREN): 6329 while True: 6330 key = self._parse_id_var() 6331 value = self._parse_primary() 6332 6333 if not key and value is None: 6334 break 6335 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6336 self._match(TokenType.R_PAREN) 6337 6338 self._match_r_paren() 6339 6340 return self.expression( 6341 exp.DictProperty, 6342 this=this, 6343 kind=kind.this if kind else None, 6344 settings=settings, 6345 ) 6346 6347 def _parse_dict_range(self, this: str) -> exp.DictRange: 6348 self._match_l_paren() 6349 has_min = self._match_text_seq("MIN") 6350 if has_min: 6351 min = self._parse_var() or self._parse_primary() 6352 self._match_text_seq("MAX") 6353 max = self._parse_var() or self._parse_primary() 6354 else: 6355 max = self._parse_var() or self._parse_primary() 6356 min = exp.Literal.number(0) 6357 self._match_r_paren() 6358 return self.expression(exp.DictRange, this=this, min=min, max=max) 6359 6360 def _parse_comprehension( 6361 self, this: t.Optional[exp.Expression] 6362 ) -> t.Optional[exp.Comprehension]: 6363 index = self._index 6364 expression = self._parse_column() 6365 if not self._match(TokenType.IN): 6366 self._retreat(index - 1) 6367 return None 6368 iterator = self._parse_column() 6369 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6370 return self.expression( 6371 exp.Comprehension, 6372 this=this, 6373 expression=expression, 6374 iterator=iterator, 6375 condition=condition, 6376 ) 6377 6378 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6379 if self._match(TokenType.HEREDOC_STRING): 6380 return self.expression(exp.Heredoc, this=self._prev.text) 6381 6382 if not self._match_text_seq("$"): 6383 return None 6384 6385 tags = ["$"] 6386 tag_text = None 6387 6388 if self._is_connected(): 6389 self._advance() 6390 tags.append(self._prev.text.upper()) 6391 else: 6392 self.raise_error("No closing $ found") 6393 6394 if tags[-1] != "$": 6395 if self._is_connected() and self._match_text_seq("$"): 6396 tag_text = tags[-1] 6397 tags.append("$") 6398 else: 6399 self.raise_error("No closing $ found") 6400 6401 heredoc_start = self._curr 6402 6403 while self._curr: 6404 if self._match_text_seq(*tags, advance=False): 6405 this = self._find_sql(heredoc_start, self._prev) 6406 self._advance(len(tags)) 6407 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6408 6409 self._advance() 6410 6411 self.raise_error(f"No closing {''.join(tags)} found") 6412 return None 6413 6414 def _find_parser( 6415 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6416 ) -> t.Optional[t.Callable]: 6417 if not self._curr: 6418 return None 6419 6420 index = self._index 6421 this = [] 6422 while True: 6423 # The current token might be multiple words 6424 curr = self._curr.text.upper() 6425 key = curr.split(" ") 6426 this.append(curr) 6427 6428 self._advance() 6429 result, trie = in_trie(trie, key) 6430 if result == TrieResult.FAILED: 6431 break 6432 6433 if result == TrieResult.EXISTS: 6434 subparser = parsers[" ".join(this)] 6435 return subparser 6436 6437 self._retreat(index) 6438 return None 6439 6440 def _match(self, token_type, advance=True, expression=None): 6441 if not self._curr: 6442 return None 6443 6444 if self._curr.token_type == token_type: 6445 if advance: 6446 self._advance() 6447 self._add_comments(expression) 6448 return True 6449 6450 return None 6451 6452 def _match_set(self, types, advance=True): 6453 if not self._curr: 6454 return None 6455 6456 if self._curr.token_type in types: 6457 if advance: 6458 self._advance() 6459 return True 6460 6461 return None 6462 6463 def _match_pair(self, token_type_a, token_type_b, advance=True): 6464 if not self._curr or not self._next: 6465 return None 6466 6467 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6468 if advance: 6469 self._advance(2) 6470 return True 6471 6472 return None 6473 6474 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6475 if not self._match(TokenType.L_PAREN, expression=expression): 6476 self.raise_error("Expecting (") 6477 6478 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6479 if not self._match(TokenType.R_PAREN, expression=expression): 6480 self.raise_error("Expecting )") 6481 6482 def _match_texts(self, texts, advance=True): 6483 if self._curr and self._curr.text.upper() in texts: 6484 if advance: 6485 self._advance() 6486 return True 6487 return None 6488 6489 def _match_text_seq(self, *texts, advance=True): 6490 index = self._index 6491 for text in texts: 6492 if self._curr and self._curr.text.upper() == text: 6493 self._advance() 6494 else: 6495 self._retreat(index) 6496 return None 6497 6498 if not advance: 6499 self._retreat(index) 6500 6501 return True 6502 6503 def _replace_lambda( 6504 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6505 ) -> t.Optional[exp.Expression]: 6506 if not node: 6507 return node 6508 6509 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6510 6511 for column in node.find_all(exp.Column): 6512 typ = lambda_types.get(column.parts[0].name) 6513 if typ is not None: 6514 dot_or_id = column.to_dot() if column.table else column.this 6515 6516 if typ: 6517 dot_or_id = self.expression( 6518 exp.Cast, 6519 this=dot_or_id, 6520 to=typ, 6521 ) 6522 6523 parent = column.parent 6524 6525 while isinstance(parent, exp.Dot): 6526 if not isinstance(parent.parent, exp.Dot): 6527 parent.replace(dot_or_id) 6528 break 6529 parent = parent.parent 6530 else: 6531 if column is node: 6532 node = dot_or_id 6533 else: 6534 column.replace(dot_or_id) 6535 return node 6536 6537 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6538 start = self._prev 6539 6540 # Not to be confused with TRUNCATE(number, decimals) function call 6541 if self._match(TokenType.L_PAREN): 6542 self._retreat(self._index - 2) 6543 return self._parse_function() 6544 6545 # Clickhouse supports TRUNCATE DATABASE as well 6546 is_database = self._match(TokenType.DATABASE) 6547 6548 self._match(TokenType.TABLE) 6549 6550 exists = self._parse_exists(not_=False) 6551 6552 expressions = self._parse_csv( 6553 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6554 ) 6555 6556 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6557 6558 if self._match_text_seq("RESTART", "IDENTITY"): 6559 identity = "RESTART" 6560 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6561 identity = "CONTINUE" 6562 else: 6563 identity = None 6564 6565 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6566 option = self._prev.text 6567 else: 6568 option = None 6569 6570 partition = self._parse_partition() 6571 6572 # Fallback case 6573 if self._curr: 6574 return self._parse_as_command(start) 6575 6576 return self.expression( 6577 exp.TruncateTable, 6578 expressions=expressions, 6579 is_database=is_database, 6580 exists=exists, 6581 cluster=cluster, 6582 identity=identity, 6583 option=option, 6584 partition=partition, 6585 ) 6586 6587 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6588 this = self._parse_ordered(self._parse_opclass) 6589 6590 if not self._match(TokenType.WITH): 6591 return this 6592 6593 op = self._parse_var(any_token=True) 6594 6595 return self.expression(exp.WithOperator, this=this, op=op) 6596 6597 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6598 opts = [] 6599 self._match(TokenType.EQ) 6600 self._match(TokenType.L_PAREN) 6601 while self._curr and not self._match(TokenType.R_PAREN): 6602 opts.append(self._parse_conjunction()) 6603 self._match(TokenType.COMMA) 6604 return opts 6605 6606 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6607 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6608 6609 options = [] 6610 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6611 option = self._parse_unquoted_field() 6612 value = None 6613 6614 # Some options are defined as functions with the values as params 6615 if not isinstance(option, exp.Func): 6616 prev = self._prev.text.upper() 6617 # Different dialects might separate options and values by white space, "=" and "AS" 6618 self._match(TokenType.EQ) 6619 self._match(TokenType.ALIAS) 6620 6621 if prev == "FILE_FORMAT" and self._match(TokenType.L_PAREN): 6622 # Snowflake FILE_FORMAT case 6623 value = self._parse_wrapped_options() 6624 else: 6625 value = self._parse_unquoted_field() 6626 6627 param = self.expression(exp.CopyParameter, this=option, expression=value) 6628 options.append(param) 6629 6630 if sep: 6631 self._match(sep) 6632 6633 return options 6634 6635 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6636 expr = self.expression(exp.Credentials) 6637 6638 if self._match_text_seq("STORAGE_INTEGRATION", advance=False): 6639 expr.set("storage", self._parse_conjunction()) 6640 if self._match_text_seq("CREDENTIALS"): 6641 # Snowflake supports CREDENTIALS = (...), while Redshift CREDENTIALS <string> 6642 creds = ( 6643 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6644 ) 6645 expr.set("credentials", creds) 6646 if self._match_text_seq("ENCRYPTION"): 6647 expr.set("encryption", self._parse_wrapped_options()) 6648 if self._match_text_seq("IAM_ROLE"): 6649 expr.set("iam_role", self._parse_field()) 6650 if self._match_text_seq("REGION"): 6651 expr.set("region", self._parse_field()) 6652 6653 return expr 6654 6655 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6656 return self._parse_field() 6657 6658 def _parse_copy(self) -> exp.Copy | exp.Command: 6659 start = self._prev 6660 6661 self._match(TokenType.INTO) 6662 6663 this = ( 6664 self._parse_conjunction() 6665 if self._match(TokenType.L_PAREN, advance=False) 6666 else self._parse_table(schema=True) 6667 ) 6668 6669 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6670 6671 files = self._parse_csv(self._parse_file_location) 6672 credentials = self._parse_credentials() 6673 6674 self._match_text_seq("WITH") 6675 6676 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6677 6678 # Fallback case 6679 if self._curr: 6680 return self._parse_as_command(start) 6681 6682 return self.expression( 6683 exp.Copy, 6684 this=this, 6685 kind=kind, 6686 credentials=credentials, 6687 files=files, 6688 params=params, 6689 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
122class Parser(metaclass=_Parser): 123 """ 124 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 125 126 Args: 127 error_level: The desired error level. 128 Default: ErrorLevel.IMMEDIATE 129 error_message_context: The amount of context to capture from a query string when displaying 130 the error message (in number of characters). 131 Default: 100 132 max_errors: Maximum number of error messages to include in a raised ParseError. 133 This is only relevant if error_level is ErrorLevel.RAISE. 134 Default: 3 135 """ 136 137 FUNCTIONS: t.Dict[str, t.Callable] = { 138 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 139 "CONCAT": lambda args, dialect: exp.Concat( 140 expressions=args, 141 safe=not dialect.STRICT_STRING_CONCAT, 142 coalesce=dialect.CONCAT_COALESCE, 143 ), 144 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 145 expressions=args, 146 safe=not dialect.STRICT_STRING_CONCAT, 147 coalesce=dialect.CONCAT_COALESCE, 148 ), 149 "DATE_TO_DATE_STR": lambda args: exp.Cast( 150 this=seq_get(args, 0), 151 to=exp.DataType(this=exp.DataType.Type.TEXT), 152 ), 153 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 154 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 155 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 156 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 157 "LIKE": build_like, 158 "LOG": build_logarithm, 159 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 160 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 161 "MOD": build_mod, 162 "TIME_TO_TIME_STR": lambda args: exp.Cast( 163 this=seq_get(args, 0), 164 to=exp.DataType(this=exp.DataType.Type.TEXT), 165 ), 166 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 167 this=exp.Cast( 168 this=seq_get(args, 0), 169 to=exp.DataType(this=exp.DataType.Type.TEXT), 170 ), 171 start=exp.Literal.number(1), 172 length=exp.Literal.number(10), 173 ), 174 "VAR_MAP": build_var_map, 175 "LOWER": build_lower, 176 "UPPER": build_upper, 177 "HEX": build_hex, 178 "TO_HEX": build_hex, 179 } 180 181 NO_PAREN_FUNCTIONS = { 182 TokenType.CURRENT_DATE: exp.CurrentDate, 183 TokenType.CURRENT_DATETIME: exp.CurrentDate, 184 TokenType.CURRENT_TIME: exp.CurrentTime, 185 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 186 TokenType.CURRENT_USER: exp.CurrentUser, 187 } 188 189 STRUCT_TYPE_TOKENS = { 190 TokenType.NESTED, 191 TokenType.OBJECT, 192 TokenType.STRUCT, 193 } 194 195 NESTED_TYPE_TOKENS = { 196 TokenType.ARRAY, 197 TokenType.LOWCARDINALITY, 198 TokenType.MAP, 199 TokenType.NULLABLE, 200 *STRUCT_TYPE_TOKENS, 201 } 202 203 ENUM_TYPE_TOKENS = { 204 TokenType.ENUM, 205 TokenType.ENUM8, 206 TokenType.ENUM16, 207 } 208 209 AGGREGATE_TYPE_TOKENS = { 210 TokenType.AGGREGATEFUNCTION, 211 TokenType.SIMPLEAGGREGATEFUNCTION, 212 } 213 214 TYPE_TOKENS = { 215 TokenType.BIT, 216 TokenType.BOOLEAN, 217 TokenType.TINYINT, 218 TokenType.UTINYINT, 219 TokenType.SMALLINT, 220 TokenType.USMALLINT, 221 TokenType.INT, 222 TokenType.UINT, 223 TokenType.BIGINT, 224 TokenType.UBIGINT, 225 TokenType.INT128, 226 TokenType.UINT128, 227 TokenType.INT256, 228 TokenType.UINT256, 229 TokenType.MEDIUMINT, 230 TokenType.UMEDIUMINT, 231 TokenType.FIXEDSTRING, 232 TokenType.FLOAT, 233 TokenType.DOUBLE, 234 TokenType.CHAR, 235 TokenType.NCHAR, 236 TokenType.VARCHAR, 237 TokenType.NVARCHAR, 238 TokenType.BPCHAR, 239 TokenType.TEXT, 240 TokenType.MEDIUMTEXT, 241 TokenType.LONGTEXT, 242 TokenType.MEDIUMBLOB, 243 TokenType.LONGBLOB, 244 TokenType.BINARY, 245 TokenType.VARBINARY, 246 TokenType.JSON, 247 TokenType.JSONB, 248 TokenType.INTERVAL, 249 TokenType.TINYBLOB, 250 TokenType.TINYTEXT, 251 TokenType.TIME, 252 TokenType.TIMETZ, 253 TokenType.TIMESTAMP, 254 TokenType.TIMESTAMP_S, 255 TokenType.TIMESTAMP_MS, 256 TokenType.TIMESTAMP_NS, 257 TokenType.TIMESTAMPTZ, 258 TokenType.TIMESTAMPLTZ, 259 TokenType.TIMESTAMPNTZ, 260 TokenType.DATETIME, 261 TokenType.DATETIME64, 262 TokenType.DATE, 263 TokenType.DATE32, 264 TokenType.INT4RANGE, 265 TokenType.INT4MULTIRANGE, 266 TokenType.INT8RANGE, 267 TokenType.INT8MULTIRANGE, 268 TokenType.NUMRANGE, 269 TokenType.NUMMULTIRANGE, 270 TokenType.TSRANGE, 271 TokenType.TSMULTIRANGE, 272 TokenType.TSTZRANGE, 273 TokenType.TSTZMULTIRANGE, 274 TokenType.DATERANGE, 275 TokenType.DATEMULTIRANGE, 276 TokenType.DECIMAL, 277 TokenType.UDECIMAL, 278 TokenType.BIGDECIMAL, 279 TokenType.UUID, 280 TokenType.GEOGRAPHY, 281 TokenType.GEOMETRY, 282 TokenType.HLLSKETCH, 283 TokenType.HSTORE, 284 TokenType.PSEUDO_TYPE, 285 TokenType.SUPER, 286 TokenType.SERIAL, 287 TokenType.SMALLSERIAL, 288 TokenType.BIGSERIAL, 289 TokenType.XML, 290 TokenType.YEAR, 291 TokenType.UNIQUEIDENTIFIER, 292 TokenType.USERDEFINED, 293 TokenType.MONEY, 294 TokenType.SMALLMONEY, 295 TokenType.ROWVERSION, 296 TokenType.IMAGE, 297 TokenType.VARIANT, 298 TokenType.OBJECT, 299 TokenType.OBJECT_IDENTIFIER, 300 TokenType.INET, 301 TokenType.IPADDRESS, 302 TokenType.IPPREFIX, 303 TokenType.IPV4, 304 TokenType.IPV6, 305 TokenType.UNKNOWN, 306 TokenType.NULL, 307 TokenType.NAME, 308 TokenType.TDIGEST, 309 *ENUM_TYPE_TOKENS, 310 *NESTED_TYPE_TOKENS, 311 *AGGREGATE_TYPE_TOKENS, 312 } 313 314 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 315 TokenType.BIGINT: TokenType.UBIGINT, 316 TokenType.INT: TokenType.UINT, 317 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 318 TokenType.SMALLINT: TokenType.USMALLINT, 319 TokenType.TINYINT: TokenType.UTINYINT, 320 TokenType.DECIMAL: TokenType.UDECIMAL, 321 } 322 323 SUBQUERY_PREDICATES = { 324 TokenType.ANY: exp.Any, 325 TokenType.ALL: exp.All, 326 TokenType.EXISTS: exp.Exists, 327 TokenType.SOME: exp.Any, 328 } 329 330 RESERVED_TOKENS = { 331 *Tokenizer.SINGLE_TOKENS.values(), 332 TokenType.SELECT, 333 } - {TokenType.IDENTIFIER} 334 335 DB_CREATABLES = { 336 TokenType.DATABASE, 337 TokenType.DICTIONARY, 338 TokenType.MODEL, 339 TokenType.SCHEMA, 340 TokenType.SEQUENCE, 341 TokenType.STORAGE_INTEGRATION, 342 TokenType.TABLE, 343 TokenType.TAG, 344 TokenType.VIEW, 345 TokenType.WAREHOUSE, 346 TokenType.STREAMLIT, 347 } 348 349 CREATABLES = { 350 TokenType.COLUMN, 351 TokenType.CONSTRAINT, 352 TokenType.FOREIGN_KEY, 353 TokenType.FUNCTION, 354 TokenType.INDEX, 355 TokenType.PROCEDURE, 356 *DB_CREATABLES, 357 } 358 359 # Tokens that can represent identifiers 360 ID_VAR_TOKENS = { 361 TokenType.VAR, 362 TokenType.ANTI, 363 TokenType.APPLY, 364 TokenType.ASC, 365 TokenType.ASOF, 366 TokenType.AUTO_INCREMENT, 367 TokenType.BEGIN, 368 TokenType.BPCHAR, 369 TokenType.CACHE, 370 TokenType.CASE, 371 TokenType.COLLATE, 372 TokenType.COMMAND, 373 TokenType.COMMENT, 374 TokenType.COMMIT, 375 TokenType.CONSTRAINT, 376 TokenType.COPY, 377 TokenType.DEFAULT, 378 TokenType.DELETE, 379 TokenType.DESC, 380 TokenType.DESCRIBE, 381 TokenType.DICTIONARY, 382 TokenType.DIV, 383 TokenType.END, 384 TokenType.EXECUTE, 385 TokenType.ESCAPE, 386 TokenType.FALSE, 387 TokenType.FIRST, 388 TokenType.FILTER, 389 TokenType.FINAL, 390 TokenType.FORMAT, 391 TokenType.FULL, 392 TokenType.IDENTIFIER, 393 TokenType.IS, 394 TokenType.ISNULL, 395 TokenType.INTERVAL, 396 TokenType.KEEP, 397 TokenType.KILL, 398 TokenType.LEFT, 399 TokenType.LOAD, 400 TokenType.MERGE, 401 TokenType.NATURAL, 402 TokenType.NEXT, 403 TokenType.OFFSET, 404 TokenType.OPERATOR, 405 TokenType.ORDINALITY, 406 TokenType.OVERLAPS, 407 TokenType.OVERWRITE, 408 TokenType.PARTITION, 409 TokenType.PERCENT, 410 TokenType.PIVOT, 411 TokenType.PRAGMA, 412 TokenType.RANGE, 413 TokenType.RECURSIVE, 414 TokenType.REFERENCES, 415 TokenType.REFRESH, 416 TokenType.REPLACE, 417 TokenType.RIGHT, 418 TokenType.ROLLUP, 419 TokenType.ROW, 420 TokenType.ROWS, 421 TokenType.SEMI, 422 TokenType.SET, 423 TokenType.SETTINGS, 424 TokenType.SHOW, 425 TokenType.TEMPORARY, 426 TokenType.TOP, 427 TokenType.TRUE, 428 TokenType.TRUNCATE, 429 TokenType.UNIQUE, 430 TokenType.UNNEST, 431 TokenType.UNPIVOT, 432 TokenType.UPDATE, 433 TokenType.USE, 434 TokenType.VOLATILE, 435 TokenType.WINDOW, 436 *CREATABLES, 437 *SUBQUERY_PREDICATES, 438 *TYPE_TOKENS, 439 *NO_PAREN_FUNCTIONS, 440 } 441 442 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 443 444 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 445 TokenType.ANTI, 446 TokenType.APPLY, 447 TokenType.ASOF, 448 TokenType.FULL, 449 TokenType.LEFT, 450 TokenType.LOCK, 451 TokenType.NATURAL, 452 TokenType.OFFSET, 453 TokenType.RIGHT, 454 TokenType.SEMI, 455 TokenType.WINDOW, 456 } 457 458 ALIAS_TOKENS = ID_VAR_TOKENS 459 460 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 461 462 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 463 464 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 465 466 FUNC_TOKENS = { 467 TokenType.COLLATE, 468 TokenType.COMMAND, 469 TokenType.CURRENT_DATE, 470 TokenType.CURRENT_DATETIME, 471 TokenType.CURRENT_TIMESTAMP, 472 TokenType.CURRENT_TIME, 473 TokenType.CURRENT_USER, 474 TokenType.FILTER, 475 TokenType.FIRST, 476 TokenType.FORMAT, 477 TokenType.GLOB, 478 TokenType.IDENTIFIER, 479 TokenType.INDEX, 480 TokenType.ISNULL, 481 TokenType.ILIKE, 482 TokenType.INSERT, 483 TokenType.LIKE, 484 TokenType.MERGE, 485 TokenType.OFFSET, 486 TokenType.PRIMARY_KEY, 487 TokenType.RANGE, 488 TokenType.REPLACE, 489 TokenType.RLIKE, 490 TokenType.ROW, 491 TokenType.UNNEST, 492 TokenType.VAR, 493 TokenType.LEFT, 494 TokenType.RIGHT, 495 TokenType.SEQUENCE, 496 TokenType.DATE, 497 TokenType.DATETIME, 498 TokenType.TABLE, 499 TokenType.TIMESTAMP, 500 TokenType.TIMESTAMPTZ, 501 TokenType.TRUNCATE, 502 TokenType.WINDOW, 503 TokenType.XOR, 504 *TYPE_TOKENS, 505 *SUBQUERY_PREDICATES, 506 } 507 508 CONJUNCTION = { 509 TokenType.AND: exp.And, 510 TokenType.OR: exp.Or, 511 } 512 513 EQUALITY = { 514 TokenType.EQ: exp.EQ, 515 TokenType.NEQ: exp.NEQ, 516 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 517 } 518 519 COMPARISON = { 520 TokenType.GT: exp.GT, 521 TokenType.GTE: exp.GTE, 522 TokenType.LT: exp.LT, 523 TokenType.LTE: exp.LTE, 524 } 525 526 BITWISE = { 527 TokenType.AMP: exp.BitwiseAnd, 528 TokenType.CARET: exp.BitwiseXor, 529 TokenType.PIPE: exp.BitwiseOr, 530 } 531 532 TERM = { 533 TokenType.DASH: exp.Sub, 534 TokenType.PLUS: exp.Add, 535 TokenType.MOD: exp.Mod, 536 TokenType.COLLATE: exp.Collate, 537 } 538 539 FACTOR = { 540 TokenType.DIV: exp.IntDiv, 541 TokenType.LR_ARROW: exp.Distance, 542 TokenType.SLASH: exp.Div, 543 TokenType.STAR: exp.Mul, 544 } 545 546 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 547 548 TIMES = { 549 TokenType.TIME, 550 TokenType.TIMETZ, 551 } 552 553 TIMESTAMPS = { 554 TokenType.TIMESTAMP, 555 TokenType.TIMESTAMPTZ, 556 TokenType.TIMESTAMPLTZ, 557 *TIMES, 558 } 559 560 SET_OPERATIONS = { 561 TokenType.UNION, 562 TokenType.INTERSECT, 563 TokenType.EXCEPT, 564 } 565 566 JOIN_METHODS = { 567 TokenType.ASOF, 568 TokenType.NATURAL, 569 TokenType.POSITIONAL, 570 } 571 572 JOIN_SIDES = { 573 TokenType.LEFT, 574 TokenType.RIGHT, 575 TokenType.FULL, 576 } 577 578 JOIN_KINDS = { 579 TokenType.INNER, 580 TokenType.OUTER, 581 TokenType.CROSS, 582 TokenType.SEMI, 583 TokenType.ANTI, 584 } 585 586 JOIN_HINTS: t.Set[str] = set() 587 588 LAMBDAS = { 589 TokenType.ARROW: lambda self, expressions: self.expression( 590 exp.Lambda, 591 this=self._replace_lambda( 592 self._parse_conjunction(), 593 expressions, 594 ), 595 expressions=expressions, 596 ), 597 TokenType.FARROW: lambda self, expressions: self.expression( 598 exp.Kwarg, 599 this=exp.var(expressions[0].name), 600 expression=self._parse_conjunction(), 601 ), 602 } 603 604 COLUMN_OPERATORS = { 605 TokenType.DOT: None, 606 TokenType.DCOLON: lambda self, this, to: self.expression( 607 exp.Cast if self.STRICT_CAST else exp.TryCast, 608 this=this, 609 to=to, 610 ), 611 TokenType.ARROW: lambda self, this, path: self.expression( 612 exp.JSONExtract, 613 this=this, 614 expression=self.dialect.to_json_path(path), 615 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 616 ), 617 TokenType.DARROW: lambda self, this, path: self.expression( 618 exp.JSONExtractScalar, 619 this=this, 620 expression=self.dialect.to_json_path(path), 621 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 622 ), 623 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 624 exp.JSONBExtract, 625 this=this, 626 expression=path, 627 ), 628 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 629 exp.JSONBExtractScalar, 630 this=this, 631 expression=path, 632 ), 633 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 634 exp.JSONBContains, 635 this=this, 636 expression=key, 637 ), 638 } 639 640 EXPRESSION_PARSERS = { 641 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 642 exp.Column: lambda self: self._parse_column(), 643 exp.Condition: lambda self: self._parse_conjunction(), 644 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 645 exp.Expression: lambda self: self._parse_expression(), 646 exp.From: lambda self: self._parse_from(joins=True), 647 exp.Group: lambda self: self._parse_group(), 648 exp.Having: lambda self: self._parse_having(), 649 exp.Identifier: lambda self: self._parse_id_var(), 650 exp.Join: lambda self: self._parse_join(), 651 exp.Lambda: lambda self: self._parse_lambda(), 652 exp.Lateral: lambda self: self._parse_lateral(), 653 exp.Limit: lambda self: self._parse_limit(), 654 exp.Offset: lambda self: self._parse_offset(), 655 exp.Order: lambda self: self._parse_order(), 656 exp.Ordered: lambda self: self._parse_ordered(), 657 exp.Properties: lambda self: self._parse_properties(), 658 exp.Qualify: lambda self: self._parse_qualify(), 659 exp.Returning: lambda self: self._parse_returning(), 660 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 661 exp.Table: lambda self: self._parse_table_parts(), 662 exp.TableAlias: lambda self: self._parse_table_alias(), 663 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 664 exp.Where: lambda self: self._parse_where(), 665 exp.Window: lambda self: self._parse_named_window(), 666 exp.With: lambda self: self._parse_with(), 667 "JOIN_TYPE": lambda self: self._parse_join_parts(), 668 } 669 670 STATEMENT_PARSERS = { 671 TokenType.ALTER: lambda self: self._parse_alter(), 672 TokenType.BEGIN: lambda self: self._parse_transaction(), 673 TokenType.CACHE: lambda self: self._parse_cache(), 674 TokenType.COMMENT: lambda self: self._parse_comment(), 675 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 676 TokenType.COPY: lambda self: self._parse_copy(), 677 TokenType.CREATE: lambda self: self._parse_create(), 678 TokenType.DELETE: lambda self: self._parse_delete(), 679 TokenType.DESC: lambda self: self._parse_describe(), 680 TokenType.DESCRIBE: lambda self: self._parse_describe(), 681 TokenType.DROP: lambda self: self._parse_drop(), 682 TokenType.INSERT: lambda self: self._parse_insert(), 683 TokenType.KILL: lambda self: self._parse_kill(), 684 TokenType.LOAD: lambda self: self._parse_load(), 685 TokenType.MERGE: lambda self: self._parse_merge(), 686 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 687 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 688 TokenType.REFRESH: lambda self: self._parse_refresh(), 689 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 690 TokenType.SET: lambda self: self._parse_set(), 691 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 692 TokenType.UNCACHE: lambda self: self._parse_uncache(), 693 TokenType.UPDATE: lambda self: self._parse_update(), 694 TokenType.USE: lambda self: self.expression( 695 exp.Use, 696 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 697 this=self._parse_table(schema=False), 698 ), 699 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 700 } 701 702 UNARY_PARSERS = { 703 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 704 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 705 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 706 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 707 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 708 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 709 } 710 711 STRING_PARSERS = { 712 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 713 exp.RawString, this=token.text 714 ), 715 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 716 exp.National, this=token.text 717 ), 718 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 719 TokenType.STRING: lambda self, token: self.expression( 720 exp.Literal, this=token.text, is_string=True 721 ), 722 TokenType.UNICODE_STRING: lambda self, token: self.expression( 723 exp.UnicodeString, 724 this=token.text, 725 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 726 ), 727 } 728 729 NUMERIC_PARSERS = { 730 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 731 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 732 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 733 TokenType.NUMBER: lambda self, token: self.expression( 734 exp.Literal, this=token.text, is_string=False 735 ), 736 } 737 738 PRIMARY_PARSERS = { 739 **STRING_PARSERS, 740 **NUMERIC_PARSERS, 741 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 742 TokenType.NULL: lambda self, _: self.expression(exp.Null), 743 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 744 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 745 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 746 TokenType.STAR: lambda self, _: self.expression( 747 exp.Star, 748 **{ 749 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 750 "replace": self._parse_star_op("REPLACE"), 751 "rename": self._parse_star_op("RENAME"), 752 }, 753 ), 754 } 755 756 PLACEHOLDER_PARSERS = { 757 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 758 TokenType.PARAMETER: lambda self: self._parse_parameter(), 759 TokenType.COLON: lambda self: ( 760 self.expression(exp.Placeholder, this=self._prev.text) 761 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 762 else None 763 ), 764 } 765 766 RANGE_PARSERS = { 767 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 768 TokenType.GLOB: binary_range_parser(exp.Glob), 769 TokenType.ILIKE: binary_range_parser(exp.ILike), 770 TokenType.IN: lambda self, this: self._parse_in(this), 771 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 772 TokenType.IS: lambda self, this: self._parse_is(this), 773 TokenType.LIKE: binary_range_parser(exp.Like), 774 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 775 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 776 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 777 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 778 } 779 780 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 781 "ALLOWED_VALUES": lambda self: self.expression( 782 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 783 ), 784 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 785 "AUTO": lambda self: self._parse_auto_property(), 786 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 787 "BACKUP": lambda self: self.expression( 788 exp.BackupProperty, this=self._parse_var(any_token=True) 789 ), 790 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 791 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 792 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 793 "CHECKSUM": lambda self: self._parse_checksum(), 794 "CLUSTER BY": lambda self: self._parse_cluster(), 795 "CLUSTERED": lambda self: self._parse_clustered_by(), 796 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 797 exp.CollateProperty, **kwargs 798 ), 799 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 800 "CONTAINS": lambda self: self._parse_contains_property(), 801 "COPY": lambda self: self._parse_copy_property(), 802 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 803 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 804 "DEFINER": lambda self: self._parse_definer(), 805 "DETERMINISTIC": lambda self: self.expression( 806 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 807 ), 808 "DISTKEY": lambda self: self._parse_distkey(), 809 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 810 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 811 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 812 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 813 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 814 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 815 "FREESPACE": lambda self: self._parse_freespace(), 816 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 817 "HEAP": lambda self: self.expression(exp.HeapProperty), 818 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 819 "IMMUTABLE": lambda self: self.expression( 820 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 821 ), 822 "INHERITS": lambda self: self.expression( 823 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 824 ), 825 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 826 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 827 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 828 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 829 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 830 "LIKE": lambda self: self._parse_create_like(), 831 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 832 "LOCK": lambda self: self._parse_locking(), 833 "LOCKING": lambda self: self._parse_locking(), 834 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 835 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 836 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 837 "MODIFIES": lambda self: self._parse_modifies_property(), 838 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 839 "NO": lambda self: self._parse_no_property(), 840 "ON": lambda self: self._parse_on_property(), 841 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 842 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 843 "PARTITION": lambda self: self._parse_partitioned_of(), 844 "PARTITION BY": lambda self: self._parse_partitioned_by(), 845 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 846 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 847 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 848 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 849 "READS": lambda self: self._parse_reads_property(), 850 "REMOTE": lambda self: self._parse_remote_with_connection(), 851 "RETURNS": lambda self: self._parse_returns(), 852 "STRICT": lambda self: self.expression(exp.StrictProperty), 853 "ROW": lambda self: self._parse_row(), 854 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 855 "SAMPLE": lambda self: self.expression( 856 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 857 ), 858 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 859 "SETTINGS": lambda self: self.expression( 860 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 861 ), 862 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 863 "SORTKEY": lambda self: self._parse_sortkey(), 864 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 865 "STABLE": lambda self: self.expression( 866 exp.StabilityProperty, this=exp.Literal.string("STABLE") 867 ), 868 "STORED": lambda self: self._parse_stored(), 869 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 870 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 871 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 872 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 873 "TO": lambda self: self._parse_to_table(), 874 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 875 "TRANSFORM": lambda self: self.expression( 876 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 877 ), 878 "TTL": lambda self: self._parse_ttl(), 879 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 880 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 881 "VOLATILE": lambda self: self._parse_volatile_property(), 882 "WITH": lambda self: self._parse_with_property(), 883 } 884 885 CONSTRAINT_PARSERS = { 886 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 887 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 888 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 889 "CHARACTER SET": lambda self: self.expression( 890 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 891 ), 892 "CHECK": lambda self: self.expression( 893 exp.CheckColumnConstraint, 894 this=self._parse_wrapped(self._parse_conjunction), 895 enforced=self._match_text_seq("ENFORCED"), 896 ), 897 "COLLATE": lambda self: self.expression( 898 exp.CollateColumnConstraint, this=self._parse_var() 899 ), 900 "COMMENT": lambda self: self.expression( 901 exp.CommentColumnConstraint, this=self._parse_string() 902 ), 903 "COMPRESS": lambda self: self._parse_compress(), 904 "CLUSTERED": lambda self: self.expression( 905 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 906 ), 907 "NONCLUSTERED": lambda self: self.expression( 908 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 909 ), 910 "DEFAULT": lambda self: self.expression( 911 exp.DefaultColumnConstraint, this=self._parse_bitwise() 912 ), 913 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 914 "EPHEMERAL": lambda self: self.expression( 915 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 916 ), 917 "EXCLUDE": lambda self: self.expression( 918 exp.ExcludeColumnConstraint, this=self._parse_index_params() 919 ), 920 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 921 "FORMAT": lambda self: self.expression( 922 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 923 ), 924 "GENERATED": lambda self: self._parse_generated_as_identity(), 925 "IDENTITY": lambda self: self._parse_auto_increment(), 926 "INLINE": lambda self: self._parse_inline(), 927 "LIKE": lambda self: self._parse_create_like(), 928 "NOT": lambda self: self._parse_not_constraint(), 929 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 930 "ON": lambda self: ( 931 self._match(TokenType.UPDATE) 932 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 933 ) 934 or self.expression(exp.OnProperty, this=self._parse_id_var()), 935 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 936 "PERIOD": lambda self: self._parse_period_for_system_time(), 937 "PRIMARY KEY": lambda self: self._parse_primary_key(), 938 "REFERENCES": lambda self: self._parse_references(match=False), 939 "TITLE": lambda self: self.expression( 940 exp.TitleColumnConstraint, this=self._parse_var_or_string() 941 ), 942 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 943 "UNIQUE": lambda self: self._parse_unique(), 944 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 945 "WITH": lambda self: self.expression( 946 exp.Properties, expressions=self._parse_wrapped_properties() 947 ), 948 } 949 950 ALTER_PARSERS = { 951 "ADD": lambda self: self._parse_alter_table_add(), 952 "ALTER": lambda self: self._parse_alter_table_alter(), 953 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 954 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 955 "DROP": lambda self: self._parse_alter_table_drop(), 956 "RENAME": lambda self: self._parse_alter_table_rename(), 957 "SET": lambda self: self._parse_alter_table_set(), 958 } 959 960 ALTER_ALTER_PARSERS = { 961 "DISTKEY": lambda self: self._parse_alter_diststyle(), 962 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 963 "SORTKEY": lambda self: self._parse_alter_sortkey(), 964 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 965 } 966 967 SCHEMA_UNNAMED_CONSTRAINTS = { 968 "CHECK", 969 "EXCLUDE", 970 "FOREIGN KEY", 971 "LIKE", 972 "PERIOD", 973 "PRIMARY KEY", 974 "UNIQUE", 975 } 976 977 NO_PAREN_FUNCTION_PARSERS = { 978 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 979 "CASE": lambda self: self._parse_case(), 980 "IF": lambda self: self._parse_if(), 981 "NEXT": lambda self: self._parse_next_value_for(), 982 } 983 984 INVALID_FUNC_NAME_TOKENS = { 985 TokenType.IDENTIFIER, 986 TokenType.STRING, 987 } 988 989 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 990 991 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 992 993 FUNCTION_PARSERS = { 994 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 995 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 996 "DECODE": lambda self: self._parse_decode(), 997 "EXTRACT": lambda self: self._parse_extract(), 998 "JSON_OBJECT": lambda self: self._parse_json_object(), 999 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1000 "JSON_TABLE": lambda self: self._parse_json_table(), 1001 "MATCH": lambda self: self._parse_match_against(), 1002 "OPENJSON": lambda self: self._parse_open_json(), 1003 "POSITION": lambda self: self._parse_position(), 1004 "PREDICT": lambda self: self._parse_predict(), 1005 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1006 "STRING_AGG": lambda self: self._parse_string_agg(), 1007 "SUBSTRING": lambda self: self._parse_substring(), 1008 "TRIM": lambda self: self._parse_trim(), 1009 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1010 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1011 } 1012 1013 QUERY_MODIFIER_PARSERS = { 1014 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1015 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1016 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1017 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1018 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1019 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1020 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1021 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1022 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1023 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1024 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1025 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1026 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1027 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1028 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1029 TokenType.CLUSTER_BY: lambda self: ( 1030 "cluster", 1031 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1032 ), 1033 TokenType.DISTRIBUTE_BY: lambda self: ( 1034 "distribute", 1035 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1036 ), 1037 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1038 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1039 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1040 } 1041 1042 SET_PARSERS = { 1043 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1044 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1045 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1046 "TRANSACTION": lambda self: self._parse_set_transaction(), 1047 } 1048 1049 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1050 1051 TYPE_LITERAL_PARSERS = { 1052 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1053 } 1054 1055 TYPE_CONVERTER: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1056 1057 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1058 1059 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1060 1061 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1062 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1063 "ISOLATION": ( 1064 ("LEVEL", "REPEATABLE", "READ"), 1065 ("LEVEL", "READ", "COMMITTED"), 1066 ("LEVEL", "READ", "UNCOMITTED"), 1067 ("LEVEL", "SERIALIZABLE"), 1068 ), 1069 "READ": ("WRITE", "ONLY"), 1070 } 1071 1072 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1073 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1074 ) 1075 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1076 1077 CREATE_SEQUENCE: OPTIONS_TYPE = { 1078 "SCALE": ("EXTEND", "NOEXTEND"), 1079 "SHARD": ("EXTEND", "NOEXTEND"), 1080 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1081 **dict.fromkeys( 1082 ( 1083 "SESSION", 1084 "GLOBAL", 1085 "KEEP", 1086 "NOKEEP", 1087 "ORDER", 1088 "NOORDER", 1089 "NOCACHE", 1090 "CYCLE", 1091 "NOCYCLE", 1092 "NOMINVALUE", 1093 "NOMAXVALUE", 1094 "NOSCALE", 1095 "NOSHARD", 1096 ), 1097 tuple(), 1098 ), 1099 } 1100 1101 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1102 1103 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1104 1105 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1106 1107 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1108 1109 CLONE_KEYWORDS = {"CLONE", "COPY"} 1110 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1111 1112 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1113 1114 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1115 1116 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1117 1118 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1119 1120 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1121 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1122 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1123 1124 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1125 1126 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1127 1128 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1129 1130 DISTINCT_TOKENS = {TokenType.DISTINCT} 1131 1132 NULL_TOKENS = {TokenType.NULL} 1133 1134 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1135 1136 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1137 1138 STRICT_CAST = True 1139 1140 PREFIXED_PIVOT_COLUMNS = False 1141 IDENTIFY_PIVOT_STRINGS = False 1142 1143 LOG_DEFAULTS_TO_LN = False 1144 1145 # Whether ADD is present for each column added by ALTER TABLE 1146 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1147 1148 # Whether the table sample clause expects CSV syntax 1149 TABLESAMPLE_CSV = False 1150 1151 # The default method used for table sampling 1152 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1153 1154 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1155 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1156 1157 # Whether the TRIM function expects the characters to trim as its first argument 1158 TRIM_PATTERN_FIRST = False 1159 1160 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1161 STRING_ALIASES = False 1162 1163 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1164 MODIFIERS_ATTACHED_TO_UNION = True 1165 UNION_MODIFIERS = {"order", "limit", "offset"} 1166 1167 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1168 NO_PAREN_IF_COMMANDS = True 1169 1170 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1171 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1172 1173 # Whether the `:` operator is used to extract a value from a JSON document 1174 COLON_IS_JSON_EXTRACT = False 1175 1176 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1177 # If this is True and '(' is not found, the keyword will be treated as an identifier 1178 VALUES_FOLLOWED_BY_PAREN = True 1179 1180 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1181 SUPPORTS_IMPLICIT_UNNEST = False 1182 1183 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1184 INTERVAL_SPANS = True 1185 1186 # Whether a PARTITION clause can follow a table reference 1187 SUPPORTS_PARTITION_SELECTION = False 1188 1189 __slots__ = ( 1190 "error_level", 1191 "error_message_context", 1192 "max_errors", 1193 "dialect", 1194 "sql", 1195 "errors", 1196 "_tokens", 1197 "_index", 1198 "_curr", 1199 "_next", 1200 "_prev", 1201 "_prev_comments", 1202 ) 1203 1204 # Autofilled 1205 SHOW_TRIE: t.Dict = {} 1206 SET_TRIE: t.Dict = {} 1207 1208 def __init__( 1209 self, 1210 error_level: t.Optional[ErrorLevel] = None, 1211 error_message_context: int = 100, 1212 max_errors: int = 3, 1213 dialect: DialectType = None, 1214 ): 1215 from sqlglot.dialects import Dialect 1216 1217 self.error_level = error_level or ErrorLevel.IMMEDIATE 1218 self.error_message_context = error_message_context 1219 self.max_errors = max_errors 1220 self.dialect = Dialect.get_or_raise(dialect) 1221 self.reset() 1222 1223 def reset(self): 1224 self.sql = "" 1225 self.errors = [] 1226 self._tokens = [] 1227 self._index = 0 1228 self._curr = None 1229 self._next = None 1230 self._prev = None 1231 self._prev_comments = None 1232 1233 def parse( 1234 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1235 ) -> t.List[t.Optional[exp.Expression]]: 1236 """ 1237 Parses a list of tokens and returns a list of syntax trees, one tree 1238 per parsed SQL statement. 1239 1240 Args: 1241 raw_tokens: The list of tokens. 1242 sql: The original SQL string, used to produce helpful debug messages. 1243 1244 Returns: 1245 The list of the produced syntax trees. 1246 """ 1247 return self._parse( 1248 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1249 ) 1250 1251 def parse_into( 1252 self, 1253 expression_types: exp.IntoType, 1254 raw_tokens: t.List[Token], 1255 sql: t.Optional[str] = None, 1256 ) -> t.List[t.Optional[exp.Expression]]: 1257 """ 1258 Parses a list of tokens into a given Expression type. If a collection of Expression 1259 types is given instead, this method will try to parse the token list into each one 1260 of them, stopping at the first for which the parsing succeeds. 1261 1262 Args: 1263 expression_types: The expression type(s) to try and parse the token list into. 1264 raw_tokens: The list of tokens. 1265 sql: The original SQL string, used to produce helpful debug messages. 1266 1267 Returns: 1268 The target Expression. 1269 """ 1270 errors = [] 1271 for expression_type in ensure_list(expression_types): 1272 parser = self.EXPRESSION_PARSERS.get(expression_type) 1273 if not parser: 1274 raise TypeError(f"No parser registered for {expression_type}") 1275 1276 try: 1277 return self._parse(parser, raw_tokens, sql) 1278 except ParseError as e: 1279 e.errors[0]["into_expression"] = expression_type 1280 errors.append(e) 1281 1282 raise ParseError( 1283 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1284 errors=merge_errors(errors), 1285 ) from errors[-1] 1286 1287 def _parse( 1288 self, 1289 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1290 raw_tokens: t.List[Token], 1291 sql: t.Optional[str] = None, 1292 ) -> t.List[t.Optional[exp.Expression]]: 1293 self.reset() 1294 self.sql = sql or "" 1295 1296 total = len(raw_tokens) 1297 chunks: t.List[t.List[Token]] = [[]] 1298 1299 for i, token in enumerate(raw_tokens): 1300 if token.token_type == TokenType.SEMICOLON: 1301 if token.comments: 1302 chunks.append([token]) 1303 1304 if i < total - 1: 1305 chunks.append([]) 1306 else: 1307 chunks[-1].append(token) 1308 1309 expressions = [] 1310 1311 for tokens in chunks: 1312 self._index = -1 1313 self._tokens = tokens 1314 self._advance() 1315 1316 expressions.append(parse_method(self)) 1317 1318 if self._index < len(self._tokens): 1319 self.raise_error("Invalid expression / Unexpected token") 1320 1321 self.check_errors() 1322 1323 return expressions 1324 1325 def check_errors(self) -> None: 1326 """Logs or raises any found errors, depending on the chosen error level setting.""" 1327 if self.error_level == ErrorLevel.WARN: 1328 for error in self.errors: 1329 logger.error(str(error)) 1330 elif self.error_level == ErrorLevel.RAISE and self.errors: 1331 raise ParseError( 1332 concat_messages(self.errors, self.max_errors), 1333 errors=merge_errors(self.errors), 1334 ) 1335 1336 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1337 """ 1338 Appends an error in the list of recorded errors or raises it, depending on the chosen 1339 error level setting. 1340 """ 1341 token = token or self._curr or self._prev or Token.string("") 1342 start = token.start 1343 end = token.end + 1 1344 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1345 highlight = self.sql[start:end] 1346 end_context = self.sql[end : end + self.error_message_context] 1347 1348 error = ParseError.new( 1349 f"{message}. Line {token.line}, Col: {token.col}.\n" 1350 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1351 description=message, 1352 line=token.line, 1353 col=token.col, 1354 start_context=start_context, 1355 highlight=highlight, 1356 end_context=end_context, 1357 ) 1358 1359 if self.error_level == ErrorLevel.IMMEDIATE: 1360 raise error 1361 1362 self.errors.append(error) 1363 1364 def expression( 1365 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1366 ) -> E: 1367 """ 1368 Creates a new, validated Expression. 1369 1370 Args: 1371 exp_class: The expression class to instantiate. 1372 comments: An optional list of comments to attach to the expression. 1373 kwargs: The arguments to set for the expression along with their respective values. 1374 1375 Returns: 1376 The target expression. 1377 """ 1378 instance = exp_class(**kwargs) 1379 instance.add_comments(comments) if comments else self._add_comments(instance) 1380 return self.validate_expression(instance) 1381 1382 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1383 if expression and self._prev_comments: 1384 expression.add_comments(self._prev_comments) 1385 self._prev_comments = None 1386 1387 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1388 """ 1389 Validates an Expression, making sure that all its mandatory arguments are set. 1390 1391 Args: 1392 expression: The expression to validate. 1393 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1394 1395 Returns: 1396 The validated expression. 1397 """ 1398 if self.error_level != ErrorLevel.IGNORE: 1399 for error_message in expression.error_messages(args): 1400 self.raise_error(error_message) 1401 1402 return expression 1403 1404 def _find_sql(self, start: Token, end: Token) -> str: 1405 return self.sql[start.start : end.end + 1] 1406 1407 def _is_connected(self) -> bool: 1408 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1409 1410 def _advance(self, times: int = 1) -> None: 1411 self._index += times 1412 self._curr = seq_get(self._tokens, self._index) 1413 self._next = seq_get(self._tokens, self._index + 1) 1414 1415 if self._index > 0: 1416 self._prev = self._tokens[self._index - 1] 1417 self._prev_comments = self._prev.comments 1418 else: 1419 self._prev = None 1420 self._prev_comments = None 1421 1422 def _retreat(self, index: int) -> None: 1423 if index != self._index: 1424 self._advance(index - self._index) 1425 1426 def _warn_unsupported(self) -> None: 1427 if len(self._tokens) <= 1: 1428 return 1429 1430 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1431 # interested in emitting a warning for the one being currently processed. 1432 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1433 1434 logger.warning( 1435 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1436 ) 1437 1438 def _parse_command(self) -> exp.Command: 1439 self._warn_unsupported() 1440 return self.expression( 1441 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1442 ) 1443 1444 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1445 """ 1446 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1447 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1448 the parser state accordingly 1449 """ 1450 index = self._index 1451 error_level = self.error_level 1452 1453 self.error_level = ErrorLevel.IMMEDIATE 1454 try: 1455 this = parse_method() 1456 except ParseError: 1457 this = None 1458 finally: 1459 if not this or retreat: 1460 self._retreat(index) 1461 self.error_level = error_level 1462 1463 return this 1464 1465 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1466 start = self._prev 1467 exists = self._parse_exists() if allow_exists else None 1468 1469 self._match(TokenType.ON) 1470 1471 materialized = self._match_text_seq("MATERIALIZED") 1472 kind = self._match_set(self.CREATABLES) and self._prev 1473 if not kind: 1474 return self._parse_as_command(start) 1475 1476 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1477 this = self._parse_user_defined_function(kind=kind.token_type) 1478 elif kind.token_type == TokenType.TABLE: 1479 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1480 elif kind.token_type == TokenType.COLUMN: 1481 this = self._parse_column() 1482 else: 1483 this = self._parse_id_var() 1484 1485 self._match(TokenType.IS) 1486 1487 return self.expression( 1488 exp.Comment, 1489 this=this, 1490 kind=kind.text, 1491 expression=self._parse_string(), 1492 exists=exists, 1493 materialized=materialized, 1494 ) 1495 1496 def _parse_to_table( 1497 self, 1498 ) -> exp.ToTableProperty: 1499 table = self._parse_table_parts(schema=True) 1500 return self.expression(exp.ToTableProperty, this=table) 1501 1502 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1503 def _parse_ttl(self) -> exp.Expression: 1504 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1505 this = self._parse_bitwise() 1506 1507 if self._match_text_seq("DELETE"): 1508 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1509 if self._match_text_seq("RECOMPRESS"): 1510 return self.expression( 1511 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1512 ) 1513 if self._match_text_seq("TO", "DISK"): 1514 return self.expression( 1515 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1516 ) 1517 if self._match_text_seq("TO", "VOLUME"): 1518 return self.expression( 1519 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1520 ) 1521 1522 return this 1523 1524 expressions = self._parse_csv(_parse_ttl_action) 1525 where = self._parse_where() 1526 group = self._parse_group() 1527 1528 aggregates = None 1529 if group and self._match(TokenType.SET): 1530 aggregates = self._parse_csv(self._parse_set_item) 1531 1532 return self.expression( 1533 exp.MergeTreeTTL, 1534 expressions=expressions, 1535 where=where, 1536 group=group, 1537 aggregates=aggregates, 1538 ) 1539 1540 def _parse_statement(self) -> t.Optional[exp.Expression]: 1541 if self._curr is None: 1542 return None 1543 1544 if self._match_set(self.STATEMENT_PARSERS): 1545 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1546 1547 if self._match_set(self.dialect.tokenizer.COMMANDS): 1548 return self._parse_command() 1549 1550 expression = self._parse_expression() 1551 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1552 return self._parse_query_modifiers(expression) 1553 1554 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1555 start = self._prev 1556 temporary = self._match(TokenType.TEMPORARY) 1557 materialized = self._match_text_seq("MATERIALIZED") 1558 1559 kind = self._match_set(self.CREATABLES) and self._prev.text 1560 if not kind: 1561 return self._parse_as_command(start) 1562 1563 if_exists = exists or self._parse_exists() 1564 table = self._parse_table_parts( 1565 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1566 ) 1567 1568 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1569 1570 if self._match(TokenType.L_PAREN, advance=False): 1571 expressions = self._parse_wrapped_csv(self._parse_types) 1572 else: 1573 expressions = None 1574 1575 return self.expression( 1576 exp.Drop, 1577 comments=start.comments, 1578 exists=if_exists, 1579 this=table, 1580 expressions=expressions, 1581 kind=kind.upper(), 1582 temporary=temporary, 1583 materialized=materialized, 1584 cascade=self._match_text_seq("CASCADE"), 1585 constraints=self._match_text_seq("CONSTRAINTS"), 1586 purge=self._match_text_seq("PURGE"), 1587 cluster=cluster, 1588 ) 1589 1590 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1591 return ( 1592 self._match_text_seq("IF") 1593 and (not not_ or self._match(TokenType.NOT)) 1594 and self._match(TokenType.EXISTS) 1595 ) 1596 1597 def _parse_create(self) -> exp.Create | exp.Command: 1598 # Note: this can't be None because we've matched a statement parser 1599 start = self._prev 1600 comments = self._prev_comments 1601 1602 replace = ( 1603 start.token_type == TokenType.REPLACE 1604 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1605 or self._match_pair(TokenType.OR, TokenType.ALTER) 1606 ) 1607 1608 unique = self._match(TokenType.UNIQUE) 1609 1610 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1611 self._advance() 1612 1613 properties = None 1614 create_token = self._match_set(self.CREATABLES) and self._prev 1615 1616 if not create_token: 1617 # exp.Properties.Location.POST_CREATE 1618 properties = self._parse_properties() 1619 create_token = self._match_set(self.CREATABLES) and self._prev 1620 1621 if not properties or not create_token: 1622 return self._parse_as_command(start) 1623 1624 exists = self._parse_exists(not_=True) 1625 this = None 1626 expression: t.Optional[exp.Expression] = None 1627 indexes = None 1628 no_schema_binding = None 1629 begin = None 1630 end = None 1631 clone = None 1632 1633 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1634 nonlocal properties 1635 if properties and temp_props: 1636 properties.expressions.extend(temp_props.expressions) 1637 elif temp_props: 1638 properties = temp_props 1639 1640 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1641 this = self._parse_user_defined_function(kind=create_token.token_type) 1642 1643 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1644 extend_props(self._parse_properties()) 1645 1646 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1647 extend_props(self._parse_properties()) 1648 1649 if not expression: 1650 if self._match(TokenType.COMMAND): 1651 expression = self._parse_as_command(self._prev) 1652 else: 1653 begin = self._match(TokenType.BEGIN) 1654 return_ = self._match_text_seq("RETURN") 1655 1656 if self._match(TokenType.STRING, advance=False): 1657 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1658 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1659 expression = self._parse_string() 1660 extend_props(self._parse_properties()) 1661 else: 1662 expression = self._parse_statement() 1663 1664 end = self._match_text_seq("END") 1665 1666 if return_: 1667 expression = self.expression(exp.Return, this=expression) 1668 elif create_token.token_type == TokenType.INDEX: 1669 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1670 if not self._match(TokenType.ON): 1671 index = self._parse_id_var() 1672 anonymous = False 1673 else: 1674 index = None 1675 anonymous = True 1676 1677 this = self._parse_index(index=index, anonymous=anonymous) 1678 elif create_token.token_type in self.DB_CREATABLES: 1679 table_parts = self._parse_table_parts( 1680 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1681 ) 1682 1683 # exp.Properties.Location.POST_NAME 1684 self._match(TokenType.COMMA) 1685 extend_props(self._parse_properties(before=True)) 1686 1687 this = self._parse_schema(this=table_parts) 1688 1689 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1690 extend_props(self._parse_properties()) 1691 1692 self._match(TokenType.ALIAS) 1693 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1694 # exp.Properties.Location.POST_ALIAS 1695 extend_props(self._parse_properties()) 1696 1697 if create_token.token_type == TokenType.SEQUENCE: 1698 expression = self._parse_types() 1699 extend_props(self._parse_properties()) 1700 else: 1701 expression = self._parse_ddl_select() 1702 1703 if create_token.token_type == TokenType.TABLE: 1704 # exp.Properties.Location.POST_EXPRESSION 1705 extend_props(self._parse_properties()) 1706 1707 indexes = [] 1708 while True: 1709 index = self._parse_index() 1710 1711 # exp.Properties.Location.POST_INDEX 1712 extend_props(self._parse_properties()) 1713 1714 if not index: 1715 break 1716 else: 1717 self._match(TokenType.COMMA) 1718 indexes.append(index) 1719 elif create_token.token_type == TokenType.VIEW: 1720 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1721 no_schema_binding = True 1722 1723 shallow = self._match_text_seq("SHALLOW") 1724 1725 if self._match_texts(self.CLONE_KEYWORDS): 1726 copy = self._prev.text.lower() == "copy" 1727 clone = self.expression( 1728 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1729 ) 1730 1731 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1732 return self._parse_as_command(start) 1733 1734 return self.expression( 1735 exp.Create, 1736 comments=comments, 1737 this=this, 1738 kind=create_token.text.upper(), 1739 replace=replace, 1740 unique=unique, 1741 expression=expression, 1742 exists=exists, 1743 properties=properties, 1744 indexes=indexes, 1745 no_schema_binding=no_schema_binding, 1746 begin=begin, 1747 end=end, 1748 clone=clone, 1749 ) 1750 1751 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1752 seq = exp.SequenceProperties() 1753 1754 options = [] 1755 index = self._index 1756 1757 while self._curr: 1758 self._match(TokenType.COMMA) 1759 if self._match_text_seq("INCREMENT"): 1760 self._match_text_seq("BY") 1761 self._match_text_seq("=") 1762 seq.set("increment", self._parse_term()) 1763 elif self._match_text_seq("MINVALUE"): 1764 seq.set("minvalue", self._parse_term()) 1765 elif self._match_text_seq("MAXVALUE"): 1766 seq.set("maxvalue", self._parse_term()) 1767 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1768 self._match_text_seq("=") 1769 seq.set("start", self._parse_term()) 1770 elif self._match_text_seq("CACHE"): 1771 # T-SQL allows empty CACHE which is initialized dynamically 1772 seq.set("cache", self._parse_number() or True) 1773 elif self._match_text_seq("OWNED", "BY"): 1774 # "OWNED BY NONE" is the default 1775 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1776 else: 1777 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1778 if opt: 1779 options.append(opt) 1780 else: 1781 break 1782 1783 seq.set("options", options if options else None) 1784 return None if self._index == index else seq 1785 1786 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1787 # only used for teradata currently 1788 self._match(TokenType.COMMA) 1789 1790 kwargs = { 1791 "no": self._match_text_seq("NO"), 1792 "dual": self._match_text_seq("DUAL"), 1793 "before": self._match_text_seq("BEFORE"), 1794 "default": self._match_text_seq("DEFAULT"), 1795 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1796 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1797 "after": self._match_text_seq("AFTER"), 1798 "minimum": self._match_texts(("MIN", "MINIMUM")), 1799 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1800 } 1801 1802 if self._match_texts(self.PROPERTY_PARSERS): 1803 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1804 try: 1805 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1806 except TypeError: 1807 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1808 1809 return None 1810 1811 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1812 return self._parse_wrapped_csv(self._parse_property) 1813 1814 def _parse_property(self) -> t.Optional[exp.Expression]: 1815 if self._match_texts(self.PROPERTY_PARSERS): 1816 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1817 1818 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1819 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1820 1821 if self._match_text_seq("COMPOUND", "SORTKEY"): 1822 return self._parse_sortkey(compound=True) 1823 1824 if self._match_text_seq("SQL", "SECURITY"): 1825 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1826 1827 index = self._index 1828 key = self._parse_column() 1829 1830 if not self._match(TokenType.EQ): 1831 self._retreat(index) 1832 return self._parse_sequence_properties() 1833 1834 return self.expression( 1835 exp.Property, 1836 this=key.to_dot() if isinstance(key, exp.Column) else key, 1837 value=self._parse_bitwise() or self._parse_var(any_token=True), 1838 ) 1839 1840 def _parse_stored(self) -> exp.FileFormatProperty: 1841 self._match(TokenType.ALIAS) 1842 1843 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1844 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1845 1846 return self.expression( 1847 exp.FileFormatProperty, 1848 this=( 1849 self.expression( 1850 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1851 ) 1852 if input_format or output_format 1853 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1854 ), 1855 ) 1856 1857 def _parse_unquoted_field(self): 1858 field = self._parse_field() 1859 if isinstance(field, exp.Identifier) and not field.quoted: 1860 field = exp.var(field) 1861 1862 return field 1863 1864 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1865 self._match(TokenType.EQ) 1866 self._match(TokenType.ALIAS) 1867 1868 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1869 1870 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1871 properties = [] 1872 while True: 1873 if before: 1874 prop = self._parse_property_before() 1875 else: 1876 prop = self._parse_property() 1877 if not prop: 1878 break 1879 for p in ensure_list(prop): 1880 properties.append(p) 1881 1882 if properties: 1883 return self.expression(exp.Properties, expressions=properties) 1884 1885 return None 1886 1887 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1888 return self.expression( 1889 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1890 ) 1891 1892 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1893 if self._index >= 2: 1894 pre_volatile_token = self._tokens[self._index - 2] 1895 else: 1896 pre_volatile_token = None 1897 1898 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1899 return exp.VolatileProperty() 1900 1901 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1902 1903 def _parse_retention_period(self) -> exp.Var: 1904 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1905 number = self._parse_number() 1906 number_str = f"{number} " if number else "" 1907 unit = self._parse_var(any_token=True) 1908 return exp.var(f"{number_str}{unit}") 1909 1910 def _parse_system_versioning_property( 1911 self, with_: bool = False 1912 ) -> exp.WithSystemVersioningProperty: 1913 self._match(TokenType.EQ) 1914 prop = self.expression( 1915 exp.WithSystemVersioningProperty, 1916 **{ # type: ignore 1917 "on": True, 1918 "with": with_, 1919 }, 1920 ) 1921 1922 if self._match_text_seq("OFF"): 1923 prop.set("on", False) 1924 return prop 1925 1926 self._match(TokenType.ON) 1927 if self._match(TokenType.L_PAREN): 1928 while self._curr and not self._match(TokenType.R_PAREN): 1929 if self._match_text_seq("HISTORY_TABLE", "="): 1930 prop.set("this", self._parse_table_parts()) 1931 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1932 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1933 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1934 prop.set("retention_period", self._parse_retention_period()) 1935 1936 self._match(TokenType.COMMA) 1937 1938 return prop 1939 1940 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1941 self._match(TokenType.EQ) 1942 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1943 prop = self.expression(exp.DataDeletionProperty, on=on) 1944 1945 if self._match(TokenType.L_PAREN): 1946 while self._curr and not self._match(TokenType.R_PAREN): 1947 if self._match_text_seq("FILTER_COLUMN", "="): 1948 prop.set("filter_column", self._parse_column()) 1949 elif self._match_text_seq("RETENTION_PERIOD", "="): 1950 prop.set("retention_period", self._parse_retention_period()) 1951 1952 self._match(TokenType.COMMA) 1953 1954 return prop 1955 1956 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1957 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1958 prop = self._parse_system_versioning_property(with_=True) 1959 self._match_r_paren() 1960 return prop 1961 1962 if self._match(TokenType.L_PAREN, advance=False): 1963 return self._parse_wrapped_properties() 1964 1965 if self._match_text_seq("JOURNAL"): 1966 return self._parse_withjournaltable() 1967 1968 if self._match_texts(self.VIEW_ATTRIBUTES): 1969 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1970 1971 if self._match_text_seq("DATA"): 1972 return self._parse_withdata(no=False) 1973 elif self._match_text_seq("NO", "DATA"): 1974 return self._parse_withdata(no=True) 1975 1976 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 1977 return self._parse_serde_properties(with_=True) 1978 1979 if not self._next: 1980 return None 1981 1982 return self._parse_withisolatedloading() 1983 1984 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1985 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1986 self._match(TokenType.EQ) 1987 1988 user = self._parse_id_var() 1989 self._match(TokenType.PARAMETER) 1990 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1991 1992 if not user or not host: 1993 return None 1994 1995 return exp.DefinerProperty(this=f"{user}@{host}") 1996 1997 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1998 self._match(TokenType.TABLE) 1999 self._match(TokenType.EQ) 2000 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2001 2002 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2003 return self.expression(exp.LogProperty, no=no) 2004 2005 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2006 return self.expression(exp.JournalProperty, **kwargs) 2007 2008 def _parse_checksum(self) -> exp.ChecksumProperty: 2009 self._match(TokenType.EQ) 2010 2011 on = None 2012 if self._match(TokenType.ON): 2013 on = True 2014 elif self._match_text_seq("OFF"): 2015 on = False 2016 2017 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2018 2019 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2020 return self.expression( 2021 exp.Cluster, 2022 expressions=( 2023 self._parse_wrapped_csv(self._parse_ordered) 2024 if wrapped 2025 else self._parse_csv(self._parse_ordered) 2026 ), 2027 ) 2028 2029 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2030 self._match_text_seq("BY") 2031 2032 self._match_l_paren() 2033 expressions = self._parse_csv(self._parse_column) 2034 self._match_r_paren() 2035 2036 if self._match_text_seq("SORTED", "BY"): 2037 self._match_l_paren() 2038 sorted_by = self._parse_csv(self._parse_ordered) 2039 self._match_r_paren() 2040 else: 2041 sorted_by = None 2042 2043 self._match(TokenType.INTO) 2044 buckets = self._parse_number() 2045 self._match_text_seq("BUCKETS") 2046 2047 return self.expression( 2048 exp.ClusteredByProperty, 2049 expressions=expressions, 2050 sorted_by=sorted_by, 2051 buckets=buckets, 2052 ) 2053 2054 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2055 if not self._match_text_seq("GRANTS"): 2056 self._retreat(self._index - 1) 2057 return None 2058 2059 return self.expression(exp.CopyGrantsProperty) 2060 2061 def _parse_freespace(self) -> exp.FreespaceProperty: 2062 self._match(TokenType.EQ) 2063 return self.expression( 2064 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2065 ) 2066 2067 def _parse_mergeblockratio( 2068 self, no: bool = False, default: bool = False 2069 ) -> exp.MergeBlockRatioProperty: 2070 if self._match(TokenType.EQ): 2071 return self.expression( 2072 exp.MergeBlockRatioProperty, 2073 this=self._parse_number(), 2074 percent=self._match(TokenType.PERCENT), 2075 ) 2076 2077 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2078 2079 def _parse_datablocksize( 2080 self, 2081 default: t.Optional[bool] = None, 2082 minimum: t.Optional[bool] = None, 2083 maximum: t.Optional[bool] = None, 2084 ) -> exp.DataBlocksizeProperty: 2085 self._match(TokenType.EQ) 2086 size = self._parse_number() 2087 2088 units = None 2089 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2090 units = self._prev.text 2091 2092 return self.expression( 2093 exp.DataBlocksizeProperty, 2094 size=size, 2095 units=units, 2096 default=default, 2097 minimum=minimum, 2098 maximum=maximum, 2099 ) 2100 2101 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2102 self._match(TokenType.EQ) 2103 always = self._match_text_seq("ALWAYS") 2104 manual = self._match_text_seq("MANUAL") 2105 never = self._match_text_seq("NEVER") 2106 default = self._match_text_seq("DEFAULT") 2107 2108 autotemp = None 2109 if self._match_text_seq("AUTOTEMP"): 2110 autotemp = self._parse_schema() 2111 2112 return self.expression( 2113 exp.BlockCompressionProperty, 2114 always=always, 2115 manual=manual, 2116 never=never, 2117 default=default, 2118 autotemp=autotemp, 2119 ) 2120 2121 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2122 index = self._index 2123 no = self._match_text_seq("NO") 2124 concurrent = self._match_text_seq("CONCURRENT") 2125 2126 if not self._match_text_seq("ISOLATED", "LOADING"): 2127 self._retreat(index) 2128 return None 2129 2130 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2131 return self.expression( 2132 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2133 ) 2134 2135 def _parse_locking(self) -> exp.LockingProperty: 2136 if self._match(TokenType.TABLE): 2137 kind = "TABLE" 2138 elif self._match(TokenType.VIEW): 2139 kind = "VIEW" 2140 elif self._match(TokenType.ROW): 2141 kind = "ROW" 2142 elif self._match_text_seq("DATABASE"): 2143 kind = "DATABASE" 2144 else: 2145 kind = None 2146 2147 if kind in ("DATABASE", "TABLE", "VIEW"): 2148 this = self._parse_table_parts() 2149 else: 2150 this = None 2151 2152 if self._match(TokenType.FOR): 2153 for_or_in = "FOR" 2154 elif self._match(TokenType.IN): 2155 for_or_in = "IN" 2156 else: 2157 for_or_in = None 2158 2159 if self._match_text_seq("ACCESS"): 2160 lock_type = "ACCESS" 2161 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2162 lock_type = "EXCLUSIVE" 2163 elif self._match_text_seq("SHARE"): 2164 lock_type = "SHARE" 2165 elif self._match_text_seq("READ"): 2166 lock_type = "READ" 2167 elif self._match_text_seq("WRITE"): 2168 lock_type = "WRITE" 2169 elif self._match_text_seq("CHECKSUM"): 2170 lock_type = "CHECKSUM" 2171 else: 2172 lock_type = None 2173 2174 override = self._match_text_seq("OVERRIDE") 2175 2176 return self.expression( 2177 exp.LockingProperty, 2178 this=this, 2179 kind=kind, 2180 for_or_in=for_or_in, 2181 lock_type=lock_type, 2182 override=override, 2183 ) 2184 2185 def _parse_partition_by(self) -> t.List[exp.Expression]: 2186 if self._match(TokenType.PARTITION_BY): 2187 return self._parse_csv(self._parse_conjunction) 2188 return [] 2189 2190 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2191 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2192 if self._match_text_seq("MINVALUE"): 2193 return exp.var("MINVALUE") 2194 if self._match_text_seq("MAXVALUE"): 2195 return exp.var("MAXVALUE") 2196 return self._parse_bitwise() 2197 2198 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2199 expression = None 2200 from_expressions = None 2201 to_expressions = None 2202 2203 if self._match(TokenType.IN): 2204 this = self._parse_wrapped_csv(self._parse_bitwise) 2205 elif self._match(TokenType.FROM): 2206 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2207 self._match_text_seq("TO") 2208 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2209 elif self._match_text_seq("WITH", "(", "MODULUS"): 2210 this = self._parse_number() 2211 self._match_text_seq(",", "REMAINDER") 2212 expression = self._parse_number() 2213 self._match_r_paren() 2214 else: 2215 self.raise_error("Failed to parse partition bound spec.") 2216 2217 return self.expression( 2218 exp.PartitionBoundSpec, 2219 this=this, 2220 expression=expression, 2221 from_expressions=from_expressions, 2222 to_expressions=to_expressions, 2223 ) 2224 2225 # https://www.postgresql.org/docs/current/sql-createtable.html 2226 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2227 if not self._match_text_seq("OF"): 2228 self._retreat(self._index - 1) 2229 return None 2230 2231 this = self._parse_table(schema=True) 2232 2233 if self._match(TokenType.DEFAULT): 2234 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2235 elif self._match_text_seq("FOR", "VALUES"): 2236 expression = self._parse_partition_bound_spec() 2237 else: 2238 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2239 2240 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2241 2242 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2243 self._match(TokenType.EQ) 2244 return self.expression( 2245 exp.PartitionedByProperty, 2246 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2247 ) 2248 2249 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2250 if self._match_text_seq("AND", "STATISTICS"): 2251 statistics = True 2252 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2253 statistics = False 2254 else: 2255 statistics = None 2256 2257 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2258 2259 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2260 if self._match_text_seq("SQL"): 2261 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2262 return None 2263 2264 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2265 if self._match_text_seq("SQL", "DATA"): 2266 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2267 return None 2268 2269 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2270 if self._match_text_seq("PRIMARY", "INDEX"): 2271 return exp.NoPrimaryIndexProperty() 2272 if self._match_text_seq("SQL"): 2273 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2274 return None 2275 2276 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2277 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2278 return exp.OnCommitProperty() 2279 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2280 return exp.OnCommitProperty(delete=True) 2281 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2282 2283 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2284 if self._match_text_seq("SQL", "DATA"): 2285 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2286 return None 2287 2288 def _parse_distkey(self) -> exp.DistKeyProperty: 2289 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2290 2291 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2292 table = self._parse_table(schema=True) 2293 2294 options = [] 2295 while self._match_texts(("INCLUDING", "EXCLUDING")): 2296 this = self._prev.text.upper() 2297 2298 id_var = self._parse_id_var() 2299 if not id_var: 2300 return None 2301 2302 options.append( 2303 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2304 ) 2305 2306 return self.expression(exp.LikeProperty, this=table, expressions=options) 2307 2308 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2309 return self.expression( 2310 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2311 ) 2312 2313 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2314 self._match(TokenType.EQ) 2315 return self.expression( 2316 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2317 ) 2318 2319 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2320 self._match_text_seq("WITH", "CONNECTION") 2321 return self.expression( 2322 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2323 ) 2324 2325 def _parse_returns(self) -> exp.ReturnsProperty: 2326 value: t.Optional[exp.Expression] 2327 null = None 2328 is_table = self._match(TokenType.TABLE) 2329 2330 if is_table: 2331 if self._match(TokenType.LT): 2332 value = self.expression( 2333 exp.Schema, 2334 this="TABLE", 2335 expressions=self._parse_csv(self._parse_struct_types), 2336 ) 2337 if not self._match(TokenType.GT): 2338 self.raise_error("Expecting >") 2339 else: 2340 value = self._parse_schema(exp.var("TABLE")) 2341 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2342 null = True 2343 value = None 2344 else: 2345 value = self._parse_types() 2346 2347 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2348 2349 def _parse_describe(self) -> exp.Describe: 2350 kind = self._match_set(self.CREATABLES) and self._prev.text 2351 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2352 if self._match(TokenType.DOT): 2353 style = None 2354 self._retreat(self._index - 2) 2355 this = self._parse_table(schema=True) 2356 properties = self._parse_properties() 2357 expressions = properties.expressions if properties else None 2358 return self.expression( 2359 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2360 ) 2361 2362 def _parse_insert(self) -> exp.Insert: 2363 comments = ensure_list(self._prev_comments) 2364 hint = self._parse_hint() 2365 overwrite = self._match(TokenType.OVERWRITE) 2366 ignore = self._match(TokenType.IGNORE) 2367 local = self._match_text_seq("LOCAL") 2368 alternative = None 2369 is_function = None 2370 2371 if self._match_text_seq("DIRECTORY"): 2372 this: t.Optional[exp.Expression] = self.expression( 2373 exp.Directory, 2374 this=self._parse_var_or_string(), 2375 local=local, 2376 row_format=self._parse_row_format(match_row=True), 2377 ) 2378 else: 2379 if self._match(TokenType.OR): 2380 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2381 2382 self._match(TokenType.INTO) 2383 comments += ensure_list(self._prev_comments) 2384 self._match(TokenType.TABLE) 2385 is_function = self._match(TokenType.FUNCTION) 2386 2387 this = ( 2388 self._parse_table(schema=True, parse_partition=True) 2389 if not is_function 2390 else self._parse_function() 2391 ) 2392 2393 returning = self._parse_returning() 2394 2395 return self.expression( 2396 exp.Insert, 2397 comments=comments, 2398 hint=hint, 2399 is_function=is_function, 2400 this=this, 2401 stored=self._match_text_seq("STORED") and self._parse_stored(), 2402 by_name=self._match_text_seq("BY", "NAME"), 2403 exists=self._parse_exists(), 2404 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2405 and self._parse_conjunction(), 2406 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2407 conflict=self._parse_on_conflict(), 2408 returning=returning or self._parse_returning(), 2409 overwrite=overwrite, 2410 alternative=alternative, 2411 ignore=ignore, 2412 ) 2413 2414 def _parse_kill(self) -> exp.Kill: 2415 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2416 2417 return self.expression( 2418 exp.Kill, 2419 this=self._parse_primary(), 2420 kind=kind, 2421 ) 2422 2423 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2424 conflict = self._match_text_seq("ON", "CONFLICT") 2425 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2426 2427 if not conflict and not duplicate: 2428 return None 2429 2430 conflict_keys = None 2431 constraint = None 2432 2433 if conflict: 2434 if self._match_text_seq("ON", "CONSTRAINT"): 2435 constraint = self._parse_id_var() 2436 elif self._match(TokenType.L_PAREN): 2437 conflict_keys = self._parse_csv(self._parse_id_var) 2438 self._match_r_paren() 2439 2440 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2441 if self._prev.token_type == TokenType.UPDATE: 2442 self._match(TokenType.SET) 2443 expressions = self._parse_csv(self._parse_equality) 2444 else: 2445 expressions = None 2446 2447 return self.expression( 2448 exp.OnConflict, 2449 duplicate=duplicate, 2450 expressions=expressions, 2451 action=action, 2452 conflict_keys=conflict_keys, 2453 constraint=constraint, 2454 ) 2455 2456 def _parse_returning(self) -> t.Optional[exp.Returning]: 2457 if not self._match(TokenType.RETURNING): 2458 return None 2459 return self.expression( 2460 exp.Returning, 2461 expressions=self._parse_csv(self._parse_expression), 2462 into=self._match(TokenType.INTO) and self._parse_table_part(), 2463 ) 2464 2465 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2466 if not self._match(TokenType.FORMAT): 2467 return None 2468 return self._parse_row_format() 2469 2470 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2471 index = self._index 2472 with_ = with_ or self._match_text_seq("WITH") 2473 2474 if not self._match(TokenType.SERDE_PROPERTIES): 2475 self._retreat(index) 2476 return None 2477 return self.expression( 2478 exp.SerdeProperties, 2479 **{ # type: ignore 2480 "expressions": self._parse_wrapped_properties(), 2481 "with": with_, 2482 }, 2483 ) 2484 2485 def _parse_row_format( 2486 self, match_row: bool = False 2487 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2488 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2489 return None 2490 2491 if self._match_text_seq("SERDE"): 2492 this = self._parse_string() 2493 2494 serde_properties = self._parse_serde_properties() 2495 2496 return self.expression( 2497 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2498 ) 2499 2500 self._match_text_seq("DELIMITED") 2501 2502 kwargs = {} 2503 2504 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2505 kwargs["fields"] = self._parse_string() 2506 if self._match_text_seq("ESCAPED", "BY"): 2507 kwargs["escaped"] = self._parse_string() 2508 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2509 kwargs["collection_items"] = self._parse_string() 2510 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2511 kwargs["map_keys"] = self._parse_string() 2512 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2513 kwargs["lines"] = self._parse_string() 2514 if self._match_text_seq("NULL", "DEFINED", "AS"): 2515 kwargs["null"] = self._parse_string() 2516 2517 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2518 2519 def _parse_load(self) -> exp.LoadData | exp.Command: 2520 if self._match_text_seq("DATA"): 2521 local = self._match_text_seq("LOCAL") 2522 self._match_text_seq("INPATH") 2523 inpath = self._parse_string() 2524 overwrite = self._match(TokenType.OVERWRITE) 2525 self._match_pair(TokenType.INTO, TokenType.TABLE) 2526 2527 return self.expression( 2528 exp.LoadData, 2529 this=self._parse_table(schema=True), 2530 local=local, 2531 overwrite=overwrite, 2532 inpath=inpath, 2533 partition=self._parse_partition(), 2534 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2535 serde=self._match_text_seq("SERDE") and self._parse_string(), 2536 ) 2537 return self._parse_as_command(self._prev) 2538 2539 def _parse_delete(self) -> exp.Delete: 2540 # This handles MySQL's "Multiple-Table Syntax" 2541 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2542 tables = None 2543 comments = self._prev_comments 2544 if not self._match(TokenType.FROM, advance=False): 2545 tables = self._parse_csv(self._parse_table) or None 2546 2547 returning = self._parse_returning() 2548 2549 return self.expression( 2550 exp.Delete, 2551 comments=comments, 2552 tables=tables, 2553 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2554 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2555 where=self._parse_where(), 2556 returning=returning or self._parse_returning(), 2557 limit=self._parse_limit(), 2558 ) 2559 2560 def _parse_update(self) -> exp.Update: 2561 comments = self._prev_comments 2562 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2563 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2564 returning = self._parse_returning() 2565 return self.expression( 2566 exp.Update, 2567 comments=comments, 2568 **{ # type: ignore 2569 "this": this, 2570 "expressions": expressions, 2571 "from": self._parse_from(joins=True), 2572 "where": self._parse_where(), 2573 "returning": returning or self._parse_returning(), 2574 "order": self._parse_order(), 2575 "limit": self._parse_limit(), 2576 }, 2577 ) 2578 2579 def _parse_uncache(self) -> exp.Uncache: 2580 if not self._match(TokenType.TABLE): 2581 self.raise_error("Expecting TABLE after UNCACHE") 2582 2583 return self.expression( 2584 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2585 ) 2586 2587 def _parse_cache(self) -> exp.Cache: 2588 lazy = self._match_text_seq("LAZY") 2589 self._match(TokenType.TABLE) 2590 table = self._parse_table(schema=True) 2591 2592 options = [] 2593 if self._match_text_seq("OPTIONS"): 2594 self._match_l_paren() 2595 k = self._parse_string() 2596 self._match(TokenType.EQ) 2597 v = self._parse_string() 2598 options = [k, v] 2599 self._match_r_paren() 2600 2601 self._match(TokenType.ALIAS) 2602 return self.expression( 2603 exp.Cache, 2604 this=table, 2605 lazy=lazy, 2606 options=options, 2607 expression=self._parse_select(nested=True), 2608 ) 2609 2610 def _parse_partition(self) -> t.Optional[exp.Partition]: 2611 if not self._match(TokenType.PARTITION): 2612 return None 2613 2614 return self.expression( 2615 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2616 ) 2617 2618 def _parse_value(self) -> t.Optional[exp.Tuple]: 2619 if self._match(TokenType.L_PAREN): 2620 expressions = self._parse_csv(self._parse_expression) 2621 self._match_r_paren() 2622 return self.expression(exp.Tuple, expressions=expressions) 2623 2624 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2625 expression = self._parse_expression() 2626 if expression: 2627 return self.expression(exp.Tuple, expressions=[expression]) 2628 return None 2629 2630 def _parse_projections(self) -> t.List[exp.Expression]: 2631 return self._parse_expressions() 2632 2633 def _parse_select( 2634 self, 2635 nested: bool = False, 2636 table: bool = False, 2637 parse_subquery_alias: bool = True, 2638 parse_set_operation: bool = True, 2639 ) -> t.Optional[exp.Expression]: 2640 cte = self._parse_with() 2641 2642 if cte: 2643 this = self._parse_statement() 2644 2645 if not this: 2646 self.raise_error("Failed to parse any statement following CTE") 2647 return cte 2648 2649 if "with" in this.arg_types: 2650 this.set("with", cte) 2651 else: 2652 self.raise_error(f"{this.key} does not support CTE") 2653 this = cte 2654 2655 return this 2656 2657 # duckdb supports leading with FROM x 2658 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2659 2660 if self._match(TokenType.SELECT): 2661 comments = self._prev_comments 2662 2663 hint = self._parse_hint() 2664 all_ = self._match(TokenType.ALL) 2665 distinct = self._match_set(self.DISTINCT_TOKENS) 2666 2667 kind = ( 2668 self._match(TokenType.ALIAS) 2669 and self._match_texts(("STRUCT", "VALUE")) 2670 and self._prev.text.upper() 2671 ) 2672 2673 if distinct: 2674 distinct = self.expression( 2675 exp.Distinct, 2676 on=self._parse_value() if self._match(TokenType.ON) else None, 2677 ) 2678 2679 if all_ and distinct: 2680 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2681 2682 limit = self._parse_limit(top=True) 2683 projections = self._parse_projections() 2684 2685 this = self.expression( 2686 exp.Select, 2687 kind=kind, 2688 hint=hint, 2689 distinct=distinct, 2690 expressions=projections, 2691 limit=limit, 2692 ) 2693 this.comments = comments 2694 2695 into = self._parse_into() 2696 if into: 2697 this.set("into", into) 2698 2699 if not from_: 2700 from_ = self._parse_from() 2701 2702 if from_: 2703 this.set("from", from_) 2704 2705 this = self._parse_query_modifiers(this) 2706 elif (table or nested) and self._match(TokenType.L_PAREN): 2707 if self._match(TokenType.PIVOT): 2708 this = self._parse_simplified_pivot() 2709 elif self._match(TokenType.FROM): 2710 this = exp.select("*").from_( 2711 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2712 ) 2713 else: 2714 this = ( 2715 self._parse_table() 2716 if table 2717 else self._parse_select(nested=True, parse_set_operation=False) 2718 ) 2719 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2720 2721 self._match_r_paren() 2722 2723 # We return early here so that the UNION isn't attached to the subquery by the 2724 # following call to _parse_set_operations, but instead becomes the parent node 2725 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2726 elif self._match(TokenType.VALUES, advance=False): 2727 this = self._parse_derived_table_values() 2728 elif from_: 2729 this = exp.select("*").from_(from_.this, copy=False) 2730 else: 2731 this = None 2732 2733 if parse_set_operation: 2734 return self._parse_set_operations(this) 2735 return this 2736 2737 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2738 if not skip_with_token and not self._match(TokenType.WITH): 2739 return None 2740 2741 comments = self._prev_comments 2742 recursive = self._match(TokenType.RECURSIVE) 2743 2744 expressions = [] 2745 while True: 2746 expressions.append(self._parse_cte()) 2747 2748 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2749 break 2750 else: 2751 self._match(TokenType.WITH) 2752 2753 return self.expression( 2754 exp.With, comments=comments, expressions=expressions, recursive=recursive 2755 ) 2756 2757 def _parse_cte(self) -> exp.CTE: 2758 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2759 if not alias or not alias.this: 2760 self.raise_error("Expected CTE to have alias") 2761 2762 self._match(TokenType.ALIAS) 2763 2764 if self._match_text_seq("NOT", "MATERIALIZED"): 2765 materialized = False 2766 elif self._match_text_seq("MATERIALIZED"): 2767 materialized = True 2768 else: 2769 materialized = None 2770 2771 return self.expression( 2772 exp.CTE, 2773 this=self._parse_wrapped(self._parse_statement), 2774 alias=alias, 2775 materialized=materialized, 2776 ) 2777 2778 def _parse_table_alias( 2779 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2780 ) -> t.Optional[exp.TableAlias]: 2781 any_token = self._match(TokenType.ALIAS) 2782 alias = ( 2783 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2784 or self._parse_string_as_identifier() 2785 ) 2786 2787 index = self._index 2788 if self._match(TokenType.L_PAREN): 2789 columns = self._parse_csv(self._parse_function_parameter) 2790 self._match_r_paren() if columns else self._retreat(index) 2791 else: 2792 columns = None 2793 2794 if not alias and not columns: 2795 return None 2796 2797 return self.expression(exp.TableAlias, this=alias, columns=columns) 2798 2799 def _parse_subquery( 2800 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2801 ) -> t.Optional[exp.Subquery]: 2802 if not this: 2803 return None 2804 2805 return self.expression( 2806 exp.Subquery, 2807 this=this, 2808 pivots=self._parse_pivots(), 2809 alias=self._parse_table_alias() if parse_alias else None, 2810 ) 2811 2812 def _implicit_unnests_to_explicit(self, this: E) -> E: 2813 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2814 2815 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2816 for i, join in enumerate(this.args.get("joins") or []): 2817 table = join.this 2818 normalized_table = table.copy() 2819 normalized_table.meta["maybe_column"] = True 2820 normalized_table = _norm(normalized_table, dialect=self.dialect) 2821 2822 if isinstance(table, exp.Table) and not join.args.get("on"): 2823 if normalized_table.parts[0].name in refs: 2824 table_as_column = table.to_column() 2825 unnest = exp.Unnest(expressions=[table_as_column]) 2826 2827 # Table.to_column creates a parent Alias node that we want to convert to 2828 # a TableAlias and attach to the Unnest, so it matches the parser's output 2829 if isinstance(table.args.get("alias"), exp.TableAlias): 2830 table_as_column.replace(table_as_column.this) 2831 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2832 2833 table.replace(unnest) 2834 2835 refs.add(normalized_table.alias_or_name) 2836 2837 return this 2838 2839 def _parse_query_modifiers( 2840 self, this: t.Optional[exp.Expression] 2841 ) -> t.Optional[exp.Expression]: 2842 if isinstance(this, (exp.Query, exp.Table)): 2843 for join in self._parse_joins(): 2844 this.append("joins", join) 2845 for lateral in iter(self._parse_lateral, None): 2846 this.append("laterals", lateral) 2847 2848 while True: 2849 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2850 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2851 key, expression = parser(self) 2852 2853 if expression: 2854 this.set(key, expression) 2855 if key == "limit": 2856 offset = expression.args.pop("offset", None) 2857 2858 if offset: 2859 offset = exp.Offset(expression=offset) 2860 this.set("offset", offset) 2861 2862 limit_by_expressions = expression.expressions 2863 expression.set("expressions", None) 2864 offset.set("expressions", limit_by_expressions) 2865 continue 2866 break 2867 2868 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2869 this = self._implicit_unnests_to_explicit(this) 2870 2871 return this 2872 2873 def _parse_hint(self) -> t.Optional[exp.Hint]: 2874 if self._match(TokenType.HINT): 2875 hints = [] 2876 for hint in iter( 2877 lambda: self._parse_csv( 2878 lambda: self._parse_function() or self._parse_var(upper=True) 2879 ), 2880 [], 2881 ): 2882 hints.extend(hint) 2883 2884 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2885 self.raise_error("Expected */ after HINT") 2886 2887 return self.expression(exp.Hint, expressions=hints) 2888 2889 return None 2890 2891 def _parse_into(self) -> t.Optional[exp.Into]: 2892 if not self._match(TokenType.INTO): 2893 return None 2894 2895 temp = self._match(TokenType.TEMPORARY) 2896 unlogged = self._match_text_seq("UNLOGGED") 2897 self._match(TokenType.TABLE) 2898 2899 return self.expression( 2900 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2901 ) 2902 2903 def _parse_from( 2904 self, joins: bool = False, skip_from_token: bool = False 2905 ) -> t.Optional[exp.From]: 2906 if not skip_from_token and not self._match(TokenType.FROM): 2907 return None 2908 2909 return self.expression( 2910 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2911 ) 2912 2913 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2914 return self.expression( 2915 exp.MatchRecognizeMeasure, 2916 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2917 this=self._parse_expression(), 2918 ) 2919 2920 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2921 if not self._match(TokenType.MATCH_RECOGNIZE): 2922 return None 2923 2924 self._match_l_paren() 2925 2926 partition = self._parse_partition_by() 2927 order = self._parse_order() 2928 2929 measures = ( 2930 self._parse_csv(self._parse_match_recognize_measure) 2931 if self._match_text_seq("MEASURES") 2932 else None 2933 ) 2934 2935 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2936 rows = exp.var("ONE ROW PER MATCH") 2937 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2938 text = "ALL ROWS PER MATCH" 2939 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2940 text += " SHOW EMPTY MATCHES" 2941 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2942 text += " OMIT EMPTY MATCHES" 2943 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2944 text += " WITH UNMATCHED ROWS" 2945 rows = exp.var(text) 2946 else: 2947 rows = None 2948 2949 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2950 text = "AFTER MATCH SKIP" 2951 if self._match_text_seq("PAST", "LAST", "ROW"): 2952 text += " PAST LAST ROW" 2953 elif self._match_text_seq("TO", "NEXT", "ROW"): 2954 text += " TO NEXT ROW" 2955 elif self._match_text_seq("TO", "FIRST"): 2956 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2957 elif self._match_text_seq("TO", "LAST"): 2958 text += f" TO LAST {self._advance_any().text}" # type: ignore 2959 after = exp.var(text) 2960 else: 2961 after = None 2962 2963 if self._match_text_seq("PATTERN"): 2964 self._match_l_paren() 2965 2966 if not self._curr: 2967 self.raise_error("Expecting )", self._curr) 2968 2969 paren = 1 2970 start = self._curr 2971 2972 while self._curr and paren > 0: 2973 if self._curr.token_type == TokenType.L_PAREN: 2974 paren += 1 2975 if self._curr.token_type == TokenType.R_PAREN: 2976 paren -= 1 2977 2978 end = self._prev 2979 self._advance() 2980 2981 if paren > 0: 2982 self.raise_error("Expecting )", self._curr) 2983 2984 pattern = exp.var(self._find_sql(start, end)) 2985 else: 2986 pattern = None 2987 2988 define = ( 2989 self._parse_csv(self._parse_name_as_expression) 2990 if self._match_text_seq("DEFINE") 2991 else None 2992 ) 2993 2994 self._match_r_paren() 2995 2996 return self.expression( 2997 exp.MatchRecognize, 2998 partition_by=partition, 2999 order=order, 3000 measures=measures, 3001 rows=rows, 3002 after=after, 3003 pattern=pattern, 3004 define=define, 3005 alias=self._parse_table_alias(), 3006 ) 3007 3008 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3009 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3010 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3011 cross_apply = False 3012 3013 if cross_apply is not None: 3014 this = self._parse_select(table=True) 3015 view = None 3016 outer = None 3017 elif self._match(TokenType.LATERAL): 3018 this = self._parse_select(table=True) 3019 view = self._match(TokenType.VIEW) 3020 outer = self._match(TokenType.OUTER) 3021 else: 3022 return None 3023 3024 if not this: 3025 this = ( 3026 self._parse_unnest() 3027 or self._parse_function() 3028 or self._parse_id_var(any_token=False) 3029 ) 3030 3031 while self._match(TokenType.DOT): 3032 this = exp.Dot( 3033 this=this, 3034 expression=self._parse_function() or self._parse_id_var(any_token=False), 3035 ) 3036 3037 if view: 3038 table = self._parse_id_var(any_token=False) 3039 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3040 table_alias: t.Optional[exp.TableAlias] = self.expression( 3041 exp.TableAlias, this=table, columns=columns 3042 ) 3043 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3044 # We move the alias from the lateral's child node to the lateral itself 3045 table_alias = this.args["alias"].pop() 3046 else: 3047 table_alias = self._parse_table_alias() 3048 3049 return self.expression( 3050 exp.Lateral, 3051 this=this, 3052 view=view, 3053 outer=outer, 3054 alias=table_alias, 3055 cross_apply=cross_apply, 3056 ) 3057 3058 def _parse_join_parts( 3059 self, 3060 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3061 return ( 3062 self._match_set(self.JOIN_METHODS) and self._prev, 3063 self._match_set(self.JOIN_SIDES) and self._prev, 3064 self._match_set(self.JOIN_KINDS) and self._prev, 3065 ) 3066 3067 def _parse_join( 3068 self, skip_join_token: bool = False, parse_bracket: bool = False 3069 ) -> t.Optional[exp.Join]: 3070 if self._match(TokenType.COMMA): 3071 return self.expression(exp.Join, this=self._parse_table()) 3072 3073 index = self._index 3074 method, side, kind = self._parse_join_parts() 3075 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3076 join = self._match(TokenType.JOIN) 3077 3078 if not skip_join_token and not join: 3079 self._retreat(index) 3080 kind = None 3081 method = None 3082 side = None 3083 3084 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3085 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3086 3087 if not skip_join_token and not join and not outer_apply and not cross_apply: 3088 return None 3089 3090 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3091 3092 if method: 3093 kwargs["method"] = method.text 3094 if side: 3095 kwargs["side"] = side.text 3096 if kind: 3097 kwargs["kind"] = kind.text 3098 if hint: 3099 kwargs["hint"] = hint 3100 3101 if self._match(TokenType.MATCH_CONDITION): 3102 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3103 3104 if self._match(TokenType.ON): 3105 kwargs["on"] = self._parse_conjunction() 3106 elif self._match(TokenType.USING): 3107 kwargs["using"] = self._parse_wrapped_id_vars() 3108 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3109 kind and kind.token_type == TokenType.CROSS 3110 ): 3111 index = self._index 3112 joins: t.Optional[list] = list(self._parse_joins()) 3113 3114 if joins and self._match(TokenType.ON): 3115 kwargs["on"] = self._parse_conjunction() 3116 elif joins and self._match(TokenType.USING): 3117 kwargs["using"] = self._parse_wrapped_id_vars() 3118 else: 3119 joins = None 3120 self._retreat(index) 3121 3122 kwargs["this"].set("joins", joins if joins else None) 3123 3124 comments = [c for token in (method, side, kind) if token for c in token.comments] 3125 return self.expression(exp.Join, comments=comments, **kwargs) 3126 3127 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3128 this = self._parse_conjunction() 3129 3130 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3131 return this 3132 3133 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3134 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3135 3136 return this 3137 3138 def _parse_index_params(self) -> exp.IndexParameters: 3139 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3140 3141 if self._match(TokenType.L_PAREN, advance=False): 3142 columns = self._parse_wrapped_csv(self._parse_with_operator) 3143 else: 3144 columns = None 3145 3146 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3147 partition_by = self._parse_partition_by() 3148 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3149 tablespace = ( 3150 self._parse_var(any_token=True) 3151 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3152 else None 3153 ) 3154 where = self._parse_where() 3155 3156 return self.expression( 3157 exp.IndexParameters, 3158 using=using, 3159 columns=columns, 3160 include=include, 3161 partition_by=partition_by, 3162 where=where, 3163 with_storage=with_storage, 3164 tablespace=tablespace, 3165 ) 3166 3167 def _parse_index( 3168 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3169 ) -> t.Optional[exp.Index]: 3170 if index or anonymous: 3171 unique = None 3172 primary = None 3173 amp = None 3174 3175 self._match(TokenType.ON) 3176 self._match(TokenType.TABLE) # hive 3177 table = self._parse_table_parts(schema=True) 3178 else: 3179 unique = self._match(TokenType.UNIQUE) 3180 primary = self._match_text_seq("PRIMARY") 3181 amp = self._match_text_seq("AMP") 3182 3183 if not self._match(TokenType.INDEX): 3184 return None 3185 3186 index = self._parse_id_var() 3187 table = None 3188 3189 params = self._parse_index_params() 3190 3191 return self.expression( 3192 exp.Index, 3193 this=index, 3194 table=table, 3195 unique=unique, 3196 primary=primary, 3197 amp=amp, 3198 params=params, 3199 ) 3200 3201 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3202 hints: t.List[exp.Expression] = [] 3203 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3204 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3205 hints.append( 3206 self.expression( 3207 exp.WithTableHint, 3208 expressions=self._parse_csv( 3209 lambda: self._parse_function() or self._parse_var(any_token=True) 3210 ), 3211 ) 3212 ) 3213 self._match_r_paren() 3214 else: 3215 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3216 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3217 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3218 3219 self._match_texts(("INDEX", "KEY")) 3220 if self._match(TokenType.FOR): 3221 hint.set("target", self._advance_any() and self._prev.text.upper()) 3222 3223 hint.set("expressions", self._parse_wrapped_id_vars()) 3224 hints.append(hint) 3225 3226 return hints or None 3227 3228 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3229 return ( 3230 (not schema and self._parse_function(optional_parens=False)) 3231 or self._parse_id_var(any_token=False) 3232 or self._parse_string_as_identifier() 3233 or self._parse_placeholder() 3234 ) 3235 3236 def _parse_table_parts( 3237 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3238 ) -> exp.Table: 3239 catalog = None 3240 db = None 3241 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3242 3243 while self._match(TokenType.DOT): 3244 if catalog: 3245 # This allows nesting the table in arbitrarily many dot expressions if needed 3246 table = self.expression( 3247 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3248 ) 3249 else: 3250 catalog = db 3251 db = table 3252 # "" used for tsql FROM a..b case 3253 table = self._parse_table_part(schema=schema) or "" 3254 3255 if ( 3256 wildcard 3257 and self._is_connected() 3258 and (isinstance(table, exp.Identifier) or not table) 3259 and self._match(TokenType.STAR) 3260 ): 3261 if isinstance(table, exp.Identifier): 3262 table.args["this"] += "*" 3263 else: 3264 table = exp.Identifier(this="*") 3265 3266 # We bubble up comments from the Identifier to the Table 3267 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3268 3269 if is_db_reference: 3270 catalog = db 3271 db = table 3272 table = None 3273 3274 if not table and not is_db_reference: 3275 self.raise_error(f"Expected table name but got {self._curr}") 3276 if not db and is_db_reference: 3277 self.raise_error(f"Expected database name but got {self._curr}") 3278 3279 return self.expression( 3280 exp.Table, 3281 comments=comments, 3282 this=table, 3283 db=db, 3284 catalog=catalog, 3285 pivots=self._parse_pivots(), 3286 ) 3287 3288 def _parse_table( 3289 self, 3290 schema: bool = False, 3291 joins: bool = False, 3292 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3293 parse_bracket: bool = False, 3294 is_db_reference: bool = False, 3295 parse_partition: bool = False, 3296 ) -> t.Optional[exp.Expression]: 3297 lateral = self._parse_lateral() 3298 if lateral: 3299 return lateral 3300 3301 unnest = self._parse_unnest() 3302 if unnest: 3303 return unnest 3304 3305 values = self._parse_derived_table_values() 3306 if values: 3307 return values 3308 3309 subquery = self._parse_select(table=True) 3310 if subquery: 3311 if not subquery.args.get("pivots"): 3312 subquery.set("pivots", self._parse_pivots()) 3313 return subquery 3314 3315 bracket = parse_bracket and self._parse_bracket(None) 3316 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3317 3318 only = self._match(TokenType.ONLY) 3319 3320 this = t.cast( 3321 exp.Expression, 3322 bracket 3323 or self._parse_bracket( 3324 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3325 ), 3326 ) 3327 3328 if only: 3329 this.set("only", only) 3330 3331 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3332 self._match_text_seq("*") 3333 3334 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3335 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3336 this.set("partition", self._parse_partition()) 3337 3338 if schema: 3339 return self._parse_schema(this=this) 3340 3341 version = self._parse_version() 3342 3343 if version: 3344 this.set("version", version) 3345 3346 if self.dialect.ALIAS_POST_TABLESAMPLE: 3347 table_sample = self._parse_table_sample() 3348 3349 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3350 if alias: 3351 this.set("alias", alias) 3352 3353 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3354 return self.expression( 3355 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3356 ) 3357 3358 this.set("hints", self._parse_table_hints()) 3359 3360 if not this.args.get("pivots"): 3361 this.set("pivots", self._parse_pivots()) 3362 3363 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3364 table_sample = self._parse_table_sample() 3365 3366 if table_sample: 3367 table_sample.set("this", this) 3368 this = table_sample 3369 3370 if joins: 3371 for join in self._parse_joins(): 3372 this.append("joins", join) 3373 3374 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3375 this.set("ordinality", True) 3376 this.set("alias", self._parse_table_alias()) 3377 3378 return this 3379 3380 def _parse_version(self) -> t.Optional[exp.Version]: 3381 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3382 this = "TIMESTAMP" 3383 elif self._match(TokenType.VERSION_SNAPSHOT): 3384 this = "VERSION" 3385 else: 3386 return None 3387 3388 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3389 kind = self._prev.text.upper() 3390 start = self._parse_bitwise() 3391 self._match_texts(("TO", "AND")) 3392 end = self._parse_bitwise() 3393 expression: t.Optional[exp.Expression] = self.expression( 3394 exp.Tuple, expressions=[start, end] 3395 ) 3396 elif self._match_text_seq("CONTAINED", "IN"): 3397 kind = "CONTAINED IN" 3398 expression = self.expression( 3399 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3400 ) 3401 elif self._match(TokenType.ALL): 3402 kind = "ALL" 3403 expression = None 3404 else: 3405 self._match_text_seq("AS", "OF") 3406 kind = "AS OF" 3407 expression = self._parse_type() 3408 3409 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3410 3411 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3412 if not self._match(TokenType.UNNEST): 3413 return None 3414 3415 expressions = self._parse_wrapped_csv(self._parse_equality) 3416 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3417 3418 alias = self._parse_table_alias() if with_alias else None 3419 3420 if alias: 3421 if self.dialect.UNNEST_COLUMN_ONLY: 3422 if alias.args.get("columns"): 3423 self.raise_error("Unexpected extra column alias in unnest.") 3424 3425 alias.set("columns", [alias.this]) 3426 alias.set("this", None) 3427 3428 columns = alias.args.get("columns") or [] 3429 if offset and len(expressions) < len(columns): 3430 offset = columns.pop() 3431 3432 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3433 self._match(TokenType.ALIAS) 3434 offset = self._parse_id_var( 3435 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3436 ) or exp.to_identifier("offset") 3437 3438 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3439 3440 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3441 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3442 if not is_derived and not self._match_text_seq("VALUES"): 3443 return None 3444 3445 expressions = self._parse_csv(self._parse_value) 3446 alias = self._parse_table_alias() 3447 3448 if is_derived: 3449 self._match_r_paren() 3450 3451 return self.expression( 3452 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3453 ) 3454 3455 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3456 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3457 as_modifier and self._match_text_seq("USING", "SAMPLE") 3458 ): 3459 return None 3460 3461 bucket_numerator = None 3462 bucket_denominator = None 3463 bucket_field = None 3464 percent = None 3465 size = None 3466 seed = None 3467 3468 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3469 matched_l_paren = self._match(TokenType.L_PAREN) 3470 3471 if self.TABLESAMPLE_CSV: 3472 num = None 3473 expressions = self._parse_csv(self._parse_primary) 3474 else: 3475 expressions = None 3476 num = ( 3477 self._parse_factor() 3478 if self._match(TokenType.NUMBER, advance=False) 3479 else self._parse_primary() or self._parse_placeholder() 3480 ) 3481 3482 if self._match_text_seq("BUCKET"): 3483 bucket_numerator = self._parse_number() 3484 self._match_text_seq("OUT", "OF") 3485 bucket_denominator = bucket_denominator = self._parse_number() 3486 self._match(TokenType.ON) 3487 bucket_field = self._parse_field() 3488 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3489 percent = num 3490 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3491 size = num 3492 else: 3493 percent = num 3494 3495 if matched_l_paren: 3496 self._match_r_paren() 3497 3498 if self._match(TokenType.L_PAREN): 3499 method = self._parse_var(upper=True) 3500 seed = self._match(TokenType.COMMA) and self._parse_number() 3501 self._match_r_paren() 3502 elif self._match_texts(("SEED", "REPEATABLE")): 3503 seed = self._parse_wrapped(self._parse_number) 3504 3505 if not method and self.DEFAULT_SAMPLING_METHOD: 3506 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3507 3508 return self.expression( 3509 exp.TableSample, 3510 expressions=expressions, 3511 method=method, 3512 bucket_numerator=bucket_numerator, 3513 bucket_denominator=bucket_denominator, 3514 bucket_field=bucket_field, 3515 percent=percent, 3516 size=size, 3517 seed=seed, 3518 ) 3519 3520 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3521 return list(iter(self._parse_pivot, None)) or None 3522 3523 def _parse_joins(self) -> t.Iterator[exp.Join]: 3524 return iter(self._parse_join, None) 3525 3526 # https://duckdb.org/docs/sql/statements/pivot 3527 def _parse_simplified_pivot(self) -> exp.Pivot: 3528 def _parse_on() -> t.Optional[exp.Expression]: 3529 this = self._parse_bitwise() 3530 return self._parse_in(this) if self._match(TokenType.IN) else this 3531 3532 this = self._parse_table() 3533 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3534 using = self._match(TokenType.USING) and self._parse_csv( 3535 lambda: self._parse_alias(self._parse_function()) 3536 ) 3537 group = self._parse_group() 3538 return self.expression( 3539 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3540 ) 3541 3542 def _parse_pivot_in(self) -> exp.In: 3543 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3544 this = self._parse_conjunction() 3545 3546 self._match(TokenType.ALIAS) 3547 alias = self._parse_field() 3548 if alias: 3549 return self.expression(exp.PivotAlias, this=this, alias=alias) 3550 3551 return this 3552 3553 value = self._parse_column() 3554 3555 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3556 self.raise_error("Expecting IN (") 3557 3558 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3559 3560 self._match_r_paren() 3561 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3562 3563 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3564 index = self._index 3565 include_nulls = None 3566 3567 if self._match(TokenType.PIVOT): 3568 unpivot = False 3569 elif self._match(TokenType.UNPIVOT): 3570 unpivot = True 3571 3572 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3573 if self._match_text_seq("INCLUDE", "NULLS"): 3574 include_nulls = True 3575 elif self._match_text_seq("EXCLUDE", "NULLS"): 3576 include_nulls = False 3577 else: 3578 return None 3579 3580 expressions = [] 3581 3582 if not self._match(TokenType.L_PAREN): 3583 self._retreat(index) 3584 return None 3585 3586 if unpivot: 3587 expressions = self._parse_csv(self._parse_column) 3588 else: 3589 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3590 3591 if not expressions: 3592 self.raise_error("Failed to parse PIVOT's aggregation list") 3593 3594 if not self._match(TokenType.FOR): 3595 self.raise_error("Expecting FOR") 3596 3597 field = self._parse_pivot_in() 3598 3599 self._match_r_paren() 3600 3601 pivot = self.expression( 3602 exp.Pivot, 3603 expressions=expressions, 3604 field=field, 3605 unpivot=unpivot, 3606 include_nulls=include_nulls, 3607 ) 3608 3609 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3610 pivot.set("alias", self._parse_table_alias()) 3611 3612 if not unpivot: 3613 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3614 3615 columns: t.List[exp.Expression] = [] 3616 for fld in pivot.args["field"].expressions: 3617 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3618 for name in names: 3619 if self.PREFIXED_PIVOT_COLUMNS: 3620 name = f"{name}_{field_name}" if name else field_name 3621 else: 3622 name = f"{field_name}_{name}" if name else field_name 3623 3624 columns.append(exp.to_identifier(name)) 3625 3626 pivot.set("columns", columns) 3627 3628 return pivot 3629 3630 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3631 return [agg.alias for agg in aggregations] 3632 3633 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3634 if not skip_where_token and not self._match(TokenType.PREWHERE): 3635 return None 3636 3637 return self.expression( 3638 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3639 ) 3640 3641 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3642 if not skip_where_token and not self._match(TokenType.WHERE): 3643 return None 3644 3645 return self.expression( 3646 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3647 ) 3648 3649 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3650 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3651 return None 3652 3653 elements: t.Dict[str, t.Any] = defaultdict(list) 3654 3655 if self._match(TokenType.ALL): 3656 elements["all"] = True 3657 elif self._match(TokenType.DISTINCT): 3658 elements["all"] = False 3659 3660 while True: 3661 expressions = self._parse_csv( 3662 lambda: None 3663 if self._match(TokenType.ROLLUP, advance=False) 3664 else self._parse_conjunction() 3665 ) 3666 if expressions: 3667 elements["expressions"].extend(expressions) 3668 3669 grouping_sets = self._parse_grouping_sets() 3670 if grouping_sets: 3671 elements["grouping_sets"].extend(grouping_sets) 3672 3673 rollup = None 3674 cube = None 3675 totals = None 3676 3677 index = self._index 3678 with_ = self._match(TokenType.WITH) 3679 if self._match(TokenType.ROLLUP): 3680 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3681 elements["rollup"].extend(ensure_list(rollup)) 3682 3683 if self._match(TokenType.CUBE): 3684 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3685 elements["cube"].extend(ensure_list(cube)) 3686 3687 if self._match_text_seq("TOTALS"): 3688 totals = True 3689 elements["totals"] = True # type: ignore 3690 3691 if not (grouping_sets or rollup or cube or totals): 3692 if with_: 3693 self._retreat(index) 3694 break 3695 3696 return self.expression(exp.Group, **elements) # type: ignore 3697 3698 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3699 if not self._match(TokenType.GROUPING_SETS): 3700 return None 3701 3702 return self._parse_wrapped_csv(self._parse_grouping_set) 3703 3704 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3705 if self._match(TokenType.L_PAREN): 3706 grouping_set = self._parse_csv(self._parse_column) 3707 self._match_r_paren() 3708 return self.expression(exp.Tuple, expressions=grouping_set) 3709 3710 return self._parse_column() 3711 3712 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3713 if not skip_having_token and not self._match(TokenType.HAVING): 3714 return None 3715 return self.expression(exp.Having, this=self._parse_conjunction()) 3716 3717 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3718 if not self._match(TokenType.QUALIFY): 3719 return None 3720 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3721 3722 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3723 if skip_start_token: 3724 start = None 3725 elif self._match(TokenType.START_WITH): 3726 start = self._parse_conjunction() 3727 else: 3728 return None 3729 3730 self._match(TokenType.CONNECT_BY) 3731 nocycle = self._match_text_seq("NOCYCLE") 3732 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3733 exp.Prior, this=self._parse_bitwise() 3734 ) 3735 connect = self._parse_conjunction() 3736 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3737 3738 if not start and self._match(TokenType.START_WITH): 3739 start = self._parse_conjunction() 3740 3741 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3742 3743 def _parse_name_as_expression(self) -> exp.Alias: 3744 return self.expression( 3745 exp.Alias, 3746 alias=self._parse_id_var(any_token=True), 3747 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3748 ) 3749 3750 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3751 if self._match_text_seq("INTERPOLATE"): 3752 return self._parse_wrapped_csv(self._parse_name_as_expression) 3753 return None 3754 3755 def _parse_order( 3756 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3757 ) -> t.Optional[exp.Expression]: 3758 siblings = None 3759 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3760 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3761 return this 3762 3763 siblings = True 3764 3765 return self.expression( 3766 exp.Order, 3767 this=this, 3768 expressions=self._parse_csv(self._parse_ordered), 3769 interpolate=self._parse_interpolate(), 3770 siblings=siblings, 3771 ) 3772 3773 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3774 if not self._match(token): 3775 return None 3776 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3777 3778 def _parse_ordered( 3779 self, parse_method: t.Optional[t.Callable] = None 3780 ) -> t.Optional[exp.Ordered]: 3781 this = parse_method() if parse_method else self._parse_conjunction() 3782 if not this: 3783 return None 3784 3785 asc = self._match(TokenType.ASC) 3786 desc = self._match(TokenType.DESC) or (asc and False) 3787 3788 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3789 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3790 3791 nulls_first = is_nulls_first or False 3792 explicitly_null_ordered = is_nulls_first or is_nulls_last 3793 3794 if ( 3795 not explicitly_null_ordered 3796 and ( 3797 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3798 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3799 ) 3800 and self.dialect.NULL_ORDERING != "nulls_are_last" 3801 ): 3802 nulls_first = True 3803 3804 if self._match_text_seq("WITH", "FILL"): 3805 with_fill = self.expression( 3806 exp.WithFill, 3807 **{ # type: ignore 3808 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3809 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3810 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3811 }, 3812 ) 3813 else: 3814 with_fill = None 3815 3816 return self.expression( 3817 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3818 ) 3819 3820 def _parse_limit( 3821 self, 3822 this: t.Optional[exp.Expression] = None, 3823 top: bool = False, 3824 skip_limit_token: bool = False, 3825 ) -> t.Optional[exp.Expression]: 3826 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3827 comments = self._prev_comments 3828 if top: 3829 limit_paren = self._match(TokenType.L_PAREN) 3830 expression = self._parse_term() if limit_paren else self._parse_number() 3831 3832 if limit_paren: 3833 self._match_r_paren() 3834 else: 3835 expression = self._parse_term() 3836 3837 if self._match(TokenType.COMMA): 3838 offset = expression 3839 expression = self._parse_term() 3840 else: 3841 offset = None 3842 3843 limit_exp = self.expression( 3844 exp.Limit, 3845 this=this, 3846 expression=expression, 3847 offset=offset, 3848 comments=comments, 3849 expressions=self._parse_limit_by(), 3850 ) 3851 3852 return limit_exp 3853 3854 if self._match(TokenType.FETCH): 3855 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3856 direction = self._prev.text.upper() if direction else "FIRST" 3857 3858 count = self._parse_field(tokens=self.FETCH_TOKENS) 3859 percent = self._match(TokenType.PERCENT) 3860 3861 self._match_set((TokenType.ROW, TokenType.ROWS)) 3862 3863 only = self._match_text_seq("ONLY") 3864 with_ties = self._match_text_seq("WITH", "TIES") 3865 3866 if only and with_ties: 3867 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3868 3869 return self.expression( 3870 exp.Fetch, 3871 direction=direction, 3872 count=count, 3873 percent=percent, 3874 with_ties=with_ties, 3875 ) 3876 3877 return this 3878 3879 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3880 if not self._match(TokenType.OFFSET): 3881 return this 3882 3883 count = self._parse_term() 3884 self._match_set((TokenType.ROW, TokenType.ROWS)) 3885 3886 return self.expression( 3887 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3888 ) 3889 3890 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3891 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3892 3893 def _parse_locks(self) -> t.List[exp.Lock]: 3894 locks = [] 3895 while True: 3896 if self._match_text_seq("FOR", "UPDATE"): 3897 update = True 3898 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3899 "LOCK", "IN", "SHARE", "MODE" 3900 ): 3901 update = False 3902 else: 3903 break 3904 3905 expressions = None 3906 if self._match_text_seq("OF"): 3907 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3908 3909 wait: t.Optional[bool | exp.Expression] = None 3910 if self._match_text_seq("NOWAIT"): 3911 wait = True 3912 elif self._match_text_seq("WAIT"): 3913 wait = self._parse_primary() 3914 elif self._match_text_seq("SKIP", "LOCKED"): 3915 wait = False 3916 3917 locks.append( 3918 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3919 ) 3920 3921 return locks 3922 3923 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3924 while this and self._match_set(self.SET_OPERATIONS): 3925 token_type = self._prev.token_type 3926 3927 if token_type == TokenType.UNION: 3928 operation = exp.Union 3929 elif token_type == TokenType.EXCEPT: 3930 operation = exp.Except 3931 else: 3932 operation = exp.Intersect 3933 3934 comments = self._prev.comments 3935 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3936 by_name = self._match_text_seq("BY", "NAME") 3937 expression = self._parse_select(nested=True, parse_set_operation=False) 3938 3939 this = self.expression( 3940 operation, 3941 comments=comments, 3942 this=this, 3943 distinct=distinct, 3944 by_name=by_name, 3945 expression=expression, 3946 ) 3947 3948 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3949 expression = this.expression 3950 3951 if expression: 3952 for arg in self.UNION_MODIFIERS: 3953 expr = expression.args.get(arg) 3954 if expr: 3955 this.set(arg, expr.pop()) 3956 3957 return this 3958 3959 def _parse_expression(self) -> t.Optional[exp.Expression]: 3960 return self._parse_alias(self._parse_conjunction()) 3961 3962 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3963 this = self._parse_equality() 3964 3965 if self._match(TokenType.COLON_EQ): 3966 this = self.expression( 3967 exp.PropertyEQ, 3968 this=this, 3969 comments=self._prev_comments, 3970 expression=self._parse_conjunction(), 3971 ) 3972 3973 while self._match_set(self.CONJUNCTION): 3974 this = self.expression( 3975 self.CONJUNCTION[self._prev.token_type], 3976 this=this, 3977 comments=self._prev_comments, 3978 expression=self._parse_equality(), 3979 ) 3980 return this 3981 3982 def _parse_equality(self) -> t.Optional[exp.Expression]: 3983 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3984 3985 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3986 return self._parse_tokens(self._parse_range, self.COMPARISON) 3987 3988 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3989 this = this or self._parse_bitwise() 3990 negate = self._match(TokenType.NOT) 3991 3992 if self._match_set(self.RANGE_PARSERS): 3993 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3994 if not expression: 3995 return this 3996 3997 this = expression 3998 elif self._match(TokenType.ISNULL): 3999 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4000 4001 # Postgres supports ISNULL and NOTNULL for conditions. 4002 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4003 if self._match(TokenType.NOTNULL): 4004 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4005 this = self.expression(exp.Not, this=this) 4006 4007 if negate: 4008 this = self.expression(exp.Not, this=this) 4009 4010 if self._match(TokenType.IS): 4011 this = self._parse_is(this) 4012 4013 return this 4014 4015 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4016 index = self._index - 1 4017 negate = self._match(TokenType.NOT) 4018 4019 if self._match_text_seq("DISTINCT", "FROM"): 4020 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4021 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4022 4023 expression = self._parse_null() or self._parse_boolean() 4024 if not expression: 4025 self._retreat(index) 4026 return None 4027 4028 this = self.expression(exp.Is, this=this, expression=expression) 4029 return self.expression(exp.Not, this=this) if negate else this 4030 4031 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4032 unnest = self._parse_unnest(with_alias=False) 4033 if unnest: 4034 this = self.expression(exp.In, this=this, unnest=unnest) 4035 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4036 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4037 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4038 4039 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4040 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4041 else: 4042 this = self.expression(exp.In, this=this, expressions=expressions) 4043 4044 if matched_l_paren: 4045 self._match_r_paren(this) 4046 elif not self._match(TokenType.R_BRACKET, expression=this): 4047 self.raise_error("Expecting ]") 4048 else: 4049 this = self.expression(exp.In, this=this, field=self._parse_field()) 4050 4051 return this 4052 4053 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4054 low = self._parse_bitwise() 4055 self._match(TokenType.AND) 4056 high = self._parse_bitwise() 4057 return self.expression(exp.Between, this=this, low=low, high=high) 4058 4059 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4060 if not self._match(TokenType.ESCAPE): 4061 return this 4062 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4063 4064 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 4065 index = self._index 4066 4067 if not self._match(TokenType.INTERVAL) and match_interval: 4068 return None 4069 4070 if self._match(TokenType.STRING, advance=False): 4071 this = self._parse_primary() 4072 else: 4073 this = self._parse_term() 4074 4075 if not this or ( 4076 isinstance(this, exp.Column) 4077 and not this.table 4078 and not this.this.quoted 4079 and this.name.upper() == "IS" 4080 ): 4081 self._retreat(index) 4082 return None 4083 4084 unit = self._parse_function() or ( 4085 not self._match(TokenType.ALIAS, advance=False) 4086 and self._parse_var(any_token=True, upper=True) 4087 ) 4088 4089 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4090 # each INTERVAL expression into this canonical form so it's easy to transpile 4091 if this and this.is_number: 4092 this = exp.Literal.string(this.name) 4093 elif this and this.is_string: 4094 parts = this.name.split() 4095 4096 if len(parts) == 2: 4097 if unit: 4098 # This is not actually a unit, it's something else (e.g. a "window side") 4099 unit = None 4100 self._retreat(self._index - 1) 4101 4102 this = exp.Literal.string(parts[0]) 4103 unit = self.expression(exp.Var, this=parts[1].upper()) 4104 4105 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4106 unit = self.expression( 4107 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4108 ) 4109 4110 return self.expression(exp.Interval, this=this, unit=unit) 4111 4112 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4113 this = self._parse_term() 4114 4115 while True: 4116 if self._match_set(self.BITWISE): 4117 this = self.expression( 4118 self.BITWISE[self._prev.token_type], 4119 this=this, 4120 expression=self._parse_term(), 4121 ) 4122 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4123 this = self.expression( 4124 exp.DPipe, 4125 this=this, 4126 expression=self._parse_term(), 4127 safe=not self.dialect.STRICT_STRING_CONCAT, 4128 ) 4129 elif self._match(TokenType.DQMARK): 4130 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4131 elif self._match_pair(TokenType.LT, TokenType.LT): 4132 this = self.expression( 4133 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4134 ) 4135 elif self._match_pair(TokenType.GT, TokenType.GT): 4136 this = self.expression( 4137 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4138 ) 4139 else: 4140 break 4141 4142 return this 4143 4144 def _parse_term(self) -> t.Optional[exp.Expression]: 4145 return self._parse_tokens(self._parse_factor, self.TERM) 4146 4147 def _parse_factor(self) -> t.Optional[exp.Expression]: 4148 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4149 this = parse_method() 4150 4151 while self._match_set(self.FACTOR): 4152 this = self.expression( 4153 self.FACTOR[self._prev.token_type], 4154 this=this, 4155 comments=self._prev_comments, 4156 expression=parse_method(), 4157 ) 4158 if isinstance(this, exp.Div): 4159 this.args["typed"] = self.dialect.TYPED_DIVISION 4160 this.args["safe"] = self.dialect.SAFE_DIVISION 4161 4162 return this 4163 4164 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4165 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4166 4167 def _parse_unary(self) -> t.Optional[exp.Expression]: 4168 if self._match_set(self.UNARY_PARSERS): 4169 return self.UNARY_PARSERS[self._prev.token_type](self) 4170 return self._parse_at_time_zone(self._parse_type()) 4171 4172 def _parse_type( 4173 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4174 ) -> t.Optional[exp.Expression]: 4175 interval = parse_interval and self._parse_interval() 4176 if interval: 4177 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4178 while True: 4179 index = self._index 4180 self._match(TokenType.PLUS) 4181 4182 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4183 self._retreat(index) 4184 break 4185 4186 interval = self.expression( # type: ignore 4187 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4188 ) 4189 4190 return interval 4191 4192 index = self._index 4193 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4194 this = self._parse_column() 4195 4196 if data_type: 4197 if isinstance(this, exp.Literal): 4198 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4199 if parser: 4200 return parser(self, this, data_type) 4201 return self.expression(exp.Cast, this=this, to=data_type) 4202 4203 if not data_type.expressions: 4204 self._retreat(index) 4205 return self._parse_id_var() if fallback_to_identifier else self._parse_column() 4206 4207 return self._parse_column_ops(data_type) 4208 4209 return this and self._parse_column_ops(this) 4210 4211 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4212 this = self._parse_type() 4213 if not this: 4214 return None 4215 4216 if isinstance(this, exp.Column) and not this.table: 4217 this = exp.var(this.name.upper()) 4218 4219 return self.expression( 4220 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4221 ) 4222 4223 def _parse_types( 4224 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4225 ) -> t.Optional[exp.Expression]: 4226 index = self._index 4227 4228 this: t.Optional[exp.Expression] = None 4229 prefix = self._match_text_seq("SYSUDTLIB", ".") 4230 4231 if not self._match_set(self.TYPE_TOKENS): 4232 identifier = allow_identifiers and self._parse_id_var( 4233 any_token=False, tokens=(TokenType.VAR,) 4234 ) 4235 if identifier: 4236 tokens = self.dialect.tokenize(identifier.name) 4237 4238 if len(tokens) != 1: 4239 self.raise_error("Unexpected identifier", self._prev) 4240 4241 if tokens[0].token_type in self.TYPE_TOKENS: 4242 self._prev = tokens[0] 4243 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4244 type_name = identifier.name 4245 4246 while self._match(TokenType.DOT): 4247 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4248 4249 this = exp.DataType.build(type_name, udt=True) 4250 else: 4251 self._retreat(self._index - 1) 4252 return None 4253 else: 4254 return None 4255 4256 type_token = self._prev.token_type 4257 4258 if type_token == TokenType.PSEUDO_TYPE: 4259 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4260 4261 if type_token == TokenType.OBJECT_IDENTIFIER: 4262 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4263 4264 nested = type_token in self.NESTED_TYPE_TOKENS 4265 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4266 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4267 expressions = None 4268 maybe_func = False 4269 4270 if self._match(TokenType.L_PAREN): 4271 if is_struct: 4272 expressions = self._parse_csv(self._parse_struct_types) 4273 elif nested: 4274 expressions = self._parse_csv( 4275 lambda: self._parse_types( 4276 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4277 ) 4278 ) 4279 elif type_token in self.ENUM_TYPE_TOKENS: 4280 expressions = self._parse_csv(self._parse_equality) 4281 elif is_aggregate: 4282 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4283 any_token=False, tokens=(TokenType.VAR,) 4284 ) 4285 if not func_or_ident or not self._match(TokenType.COMMA): 4286 return None 4287 expressions = self._parse_csv( 4288 lambda: self._parse_types( 4289 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4290 ) 4291 ) 4292 expressions.insert(0, func_or_ident) 4293 else: 4294 expressions = self._parse_csv(self._parse_type_size) 4295 4296 if not expressions or not self._match(TokenType.R_PAREN): 4297 self._retreat(index) 4298 return None 4299 4300 maybe_func = True 4301 4302 values: t.Optional[t.List[exp.Expression]] = None 4303 4304 if nested and self._match(TokenType.LT): 4305 if is_struct: 4306 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4307 else: 4308 expressions = self._parse_csv( 4309 lambda: self._parse_types( 4310 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4311 ) 4312 ) 4313 4314 if not self._match(TokenType.GT): 4315 self.raise_error("Expecting >") 4316 4317 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4318 values = self._parse_csv(self._parse_conjunction) 4319 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4320 4321 if type_token in self.TIMESTAMPS: 4322 if self._match_text_seq("WITH", "TIME", "ZONE"): 4323 maybe_func = False 4324 tz_type = ( 4325 exp.DataType.Type.TIMETZ 4326 if type_token in self.TIMES 4327 else exp.DataType.Type.TIMESTAMPTZ 4328 ) 4329 this = exp.DataType(this=tz_type, expressions=expressions) 4330 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4331 maybe_func = False 4332 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4333 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4334 maybe_func = False 4335 elif type_token == TokenType.INTERVAL: 4336 unit = self._parse_var(upper=True) 4337 if unit: 4338 if self._match_text_seq("TO"): 4339 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4340 4341 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4342 else: 4343 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4344 4345 if maybe_func and check_func: 4346 index2 = self._index 4347 peek = self._parse_string() 4348 4349 if not peek: 4350 self._retreat(index) 4351 return None 4352 4353 self._retreat(index2) 4354 4355 if not this: 4356 if self._match_text_seq("UNSIGNED"): 4357 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4358 if not unsigned_type_token: 4359 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4360 4361 type_token = unsigned_type_token or type_token 4362 4363 this = exp.DataType( 4364 this=exp.DataType.Type[type_token.value], 4365 expressions=expressions, 4366 nested=nested, 4367 values=values, 4368 prefix=prefix, 4369 ) 4370 elif expressions: 4371 this.set("expressions", expressions) 4372 4373 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4374 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4375 4376 if self.TYPE_CONVERTER and isinstance(this.this, exp.DataType.Type): 4377 converter = self.TYPE_CONVERTER.get(this.this) 4378 if converter: 4379 this = converter(t.cast(exp.DataType, this)) 4380 4381 return this 4382 4383 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4384 index = self._index 4385 this = ( 4386 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4387 or self._parse_id_var() 4388 ) 4389 self._match(TokenType.COLON) 4390 column_def = self._parse_column_def(this) 4391 4392 if type_required and ( 4393 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4394 ): 4395 self._retreat(index) 4396 return self._parse_types() 4397 4398 return column_def 4399 4400 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4401 if not self._match_text_seq("AT", "TIME", "ZONE"): 4402 return this 4403 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4404 4405 def _parse_column(self) -> t.Optional[exp.Expression]: 4406 this = self._parse_column_reference() 4407 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4408 4409 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4410 this = self._parse_field() 4411 if ( 4412 not this 4413 and self._match(TokenType.VALUES, advance=False) 4414 and self.VALUES_FOLLOWED_BY_PAREN 4415 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4416 ): 4417 this = self._parse_id_var() 4418 4419 if isinstance(this, exp.Identifier): 4420 # We bubble up comments from the Identifier to the Column 4421 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4422 4423 return this 4424 4425 def _parse_colon_as_json_extract( 4426 self, this: t.Optional[exp.Expression] 4427 ) -> t.Optional[exp.Expression]: 4428 casts = [] 4429 json_path = [] 4430 4431 while self._match(TokenType.COLON): 4432 start_index = self._index 4433 path = self._parse_column_ops(self._parse_field(any_token=True)) 4434 4435 # The cast :: operator has a lower precedence than the extraction operator :, so 4436 # we rearrange the AST appropriately to avoid casting the JSON path 4437 while isinstance(path, exp.Cast): 4438 casts.append(path.to) 4439 path = path.this 4440 4441 if casts: 4442 dcolon_offset = next( 4443 i 4444 for i, t in enumerate(self._tokens[start_index:]) 4445 if t.token_type == TokenType.DCOLON 4446 ) 4447 end_token = self._tokens[start_index + dcolon_offset - 1] 4448 else: 4449 end_token = self._prev 4450 4451 if path: 4452 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4453 4454 if json_path: 4455 this = self.expression( 4456 exp.JSONExtract, 4457 this=this, 4458 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4459 ) 4460 4461 while casts: 4462 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4463 4464 return this 4465 4466 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4467 this = self._parse_bracket(this) 4468 4469 while self._match_set(self.COLUMN_OPERATORS): 4470 op_token = self._prev.token_type 4471 op = self.COLUMN_OPERATORS.get(op_token) 4472 4473 if op_token == TokenType.DCOLON: 4474 field = self._parse_types() 4475 if not field: 4476 self.raise_error("Expected type") 4477 elif op and self._curr: 4478 field = self._parse_column_reference() 4479 else: 4480 field = self._parse_field(any_token=True, anonymous_func=True) 4481 4482 if isinstance(field, exp.Func) and this: 4483 # bigquery allows function calls like x.y.count(...) 4484 # SAFE.SUBSTR(...) 4485 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4486 this = exp.replace_tree( 4487 this, 4488 lambda n: ( 4489 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4490 if n.table 4491 else n.this 4492 ) 4493 if isinstance(n, exp.Column) 4494 else n, 4495 ) 4496 4497 if op: 4498 this = op(self, this, field) 4499 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4500 this = self.expression( 4501 exp.Column, 4502 this=field, 4503 table=this.this, 4504 db=this.args.get("table"), 4505 catalog=this.args.get("db"), 4506 ) 4507 else: 4508 this = self.expression(exp.Dot, this=this, expression=field) 4509 4510 this = self._parse_bracket(this) 4511 4512 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4513 4514 def _parse_primary(self) -> t.Optional[exp.Expression]: 4515 if self._match_set(self.PRIMARY_PARSERS): 4516 token_type = self._prev.token_type 4517 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4518 4519 if token_type == TokenType.STRING: 4520 expressions = [primary] 4521 while self._match(TokenType.STRING): 4522 expressions.append(exp.Literal.string(self._prev.text)) 4523 4524 if len(expressions) > 1: 4525 return self.expression(exp.Concat, expressions=expressions) 4526 4527 return primary 4528 4529 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4530 return exp.Literal.number(f"0.{self._prev.text}") 4531 4532 if self._match(TokenType.L_PAREN): 4533 comments = self._prev_comments 4534 query = self._parse_select() 4535 4536 if query: 4537 expressions = [query] 4538 else: 4539 expressions = self._parse_expressions() 4540 4541 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4542 4543 if not this and self._match(TokenType.R_PAREN, advance=False): 4544 this = self.expression(exp.Tuple) 4545 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4546 this = self._parse_subquery(this=this, parse_alias=False) 4547 elif isinstance(this, exp.Subquery): 4548 this = self._parse_subquery( 4549 this=self._parse_set_operations(this), parse_alias=False 4550 ) 4551 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4552 this = self.expression(exp.Tuple, expressions=expressions) 4553 else: 4554 this = self.expression(exp.Paren, this=this) 4555 4556 if this: 4557 this.add_comments(comments) 4558 4559 self._match_r_paren(expression=this) 4560 return this 4561 4562 return None 4563 4564 def _parse_field( 4565 self, 4566 any_token: bool = False, 4567 tokens: t.Optional[t.Collection[TokenType]] = None, 4568 anonymous_func: bool = False, 4569 ) -> t.Optional[exp.Expression]: 4570 if anonymous_func: 4571 field = ( 4572 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4573 or self._parse_primary() 4574 ) 4575 else: 4576 field = self._parse_primary() or self._parse_function( 4577 anonymous=anonymous_func, any_token=any_token 4578 ) 4579 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4580 4581 def _parse_function( 4582 self, 4583 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4584 anonymous: bool = False, 4585 optional_parens: bool = True, 4586 any_token: bool = False, 4587 ) -> t.Optional[exp.Expression]: 4588 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4589 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4590 fn_syntax = False 4591 if ( 4592 self._match(TokenType.L_BRACE, advance=False) 4593 and self._next 4594 and self._next.text.upper() == "FN" 4595 ): 4596 self._advance(2) 4597 fn_syntax = True 4598 4599 func = self._parse_function_call( 4600 functions=functions, 4601 anonymous=anonymous, 4602 optional_parens=optional_parens, 4603 any_token=any_token, 4604 ) 4605 4606 if fn_syntax: 4607 self._match(TokenType.R_BRACE) 4608 4609 return func 4610 4611 def _parse_function_call( 4612 self, 4613 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4614 anonymous: bool = False, 4615 optional_parens: bool = True, 4616 any_token: bool = False, 4617 ) -> t.Optional[exp.Expression]: 4618 if not self._curr: 4619 return None 4620 4621 comments = self._curr.comments 4622 token_type = self._curr.token_type 4623 this = self._curr.text 4624 upper = this.upper() 4625 4626 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4627 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4628 self._advance() 4629 return self._parse_window(parser(self)) 4630 4631 if not self._next or self._next.token_type != TokenType.L_PAREN: 4632 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4633 self._advance() 4634 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4635 4636 return None 4637 4638 if any_token: 4639 if token_type in self.RESERVED_TOKENS: 4640 return None 4641 elif token_type not in self.FUNC_TOKENS: 4642 return None 4643 4644 self._advance(2) 4645 4646 parser = self.FUNCTION_PARSERS.get(upper) 4647 if parser and not anonymous: 4648 this = parser(self) 4649 else: 4650 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4651 4652 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4653 this = self.expression(subquery_predicate, this=self._parse_select()) 4654 self._match_r_paren() 4655 return this 4656 4657 if functions is None: 4658 functions = self.FUNCTIONS 4659 4660 function = functions.get(upper) 4661 4662 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4663 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4664 4665 if alias: 4666 args = self._kv_to_prop_eq(args) 4667 4668 if function and not anonymous: 4669 if "dialect" in function.__code__.co_varnames: 4670 func = function(args, dialect=self.dialect) 4671 else: 4672 func = function(args) 4673 4674 func = self.validate_expression(func, args) 4675 if not self.dialect.NORMALIZE_FUNCTIONS: 4676 func.meta["name"] = this 4677 4678 this = func 4679 else: 4680 if token_type == TokenType.IDENTIFIER: 4681 this = exp.Identifier(this=this, quoted=True) 4682 this = self.expression(exp.Anonymous, this=this, expressions=args) 4683 4684 if isinstance(this, exp.Expression): 4685 this.add_comments(comments) 4686 4687 self._match_r_paren(this) 4688 return self._parse_window(this) 4689 4690 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4691 transformed = [] 4692 4693 for e in expressions: 4694 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4695 if isinstance(e, exp.Alias): 4696 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4697 4698 if not isinstance(e, exp.PropertyEQ): 4699 e = self.expression( 4700 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4701 ) 4702 4703 if isinstance(e.this, exp.Column): 4704 e.this.replace(e.this.this) 4705 4706 transformed.append(e) 4707 4708 return transformed 4709 4710 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4711 return self._parse_column_def(self._parse_id_var()) 4712 4713 def _parse_user_defined_function( 4714 self, kind: t.Optional[TokenType] = None 4715 ) -> t.Optional[exp.Expression]: 4716 this = self._parse_id_var() 4717 4718 while self._match(TokenType.DOT): 4719 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4720 4721 if not self._match(TokenType.L_PAREN): 4722 return this 4723 4724 expressions = self._parse_csv(self._parse_function_parameter) 4725 self._match_r_paren() 4726 return self.expression( 4727 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4728 ) 4729 4730 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4731 literal = self._parse_primary() 4732 if literal: 4733 return self.expression(exp.Introducer, this=token.text, expression=literal) 4734 4735 return self.expression(exp.Identifier, this=token.text) 4736 4737 def _parse_session_parameter(self) -> exp.SessionParameter: 4738 kind = None 4739 this = self._parse_id_var() or self._parse_primary() 4740 4741 if this and self._match(TokenType.DOT): 4742 kind = this.name 4743 this = self._parse_var() or self._parse_primary() 4744 4745 return self.expression(exp.SessionParameter, this=this, kind=kind) 4746 4747 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4748 return self._parse_id_var() 4749 4750 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4751 index = self._index 4752 4753 if self._match(TokenType.L_PAREN): 4754 expressions = t.cast( 4755 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4756 ) 4757 4758 if not self._match(TokenType.R_PAREN): 4759 self._retreat(index) 4760 else: 4761 expressions = [self._parse_lambda_arg()] 4762 4763 if self._match_set(self.LAMBDAS): 4764 return self.LAMBDAS[self._prev.token_type](self, expressions) 4765 4766 self._retreat(index) 4767 4768 this: t.Optional[exp.Expression] 4769 4770 if self._match(TokenType.DISTINCT): 4771 this = self.expression( 4772 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4773 ) 4774 else: 4775 this = self._parse_select_or_expression(alias=alias) 4776 4777 return self._parse_limit( 4778 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4779 ) 4780 4781 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4782 index = self._index 4783 if not self._match(TokenType.L_PAREN): 4784 return this 4785 4786 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4787 # expr can be of both types 4788 if self._match_set(self.SELECT_START_TOKENS): 4789 self._retreat(index) 4790 return this 4791 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4792 self._match_r_paren() 4793 return self.expression(exp.Schema, this=this, expressions=args) 4794 4795 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4796 return self._parse_column_def(self._parse_field(any_token=True)) 4797 4798 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4799 # column defs are not really columns, they're identifiers 4800 if isinstance(this, exp.Column): 4801 this = this.this 4802 4803 kind = self._parse_types(schema=True) 4804 4805 if self._match_text_seq("FOR", "ORDINALITY"): 4806 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4807 4808 constraints: t.List[exp.Expression] = [] 4809 4810 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4811 ("ALIAS", "MATERIALIZED") 4812 ): 4813 persisted = self._prev.text.upper() == "MATERIALIZED" 4814 constraints.append( 4815 self.expression( 4816 exp.ComputedColumnConstraint, 4817 this=self._parse_conjunction(), 4818 persisted=persisted or self._match_text_seq("PERSISTED"), 4819 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4820 ) 4821 ) 4822 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4823 self._match(TokenType.ALIAS) 4824 constraints.append( 4825 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4826 ) 4827 4828 while True: 4829 constraint = self._parse_column_constraint() 4830 if not constraint: 4831 break 4832 constraints.append(constraint) 4833 4834 if not kind and not constraints: 4835 return this 4836 4837 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4838 4839 def _parse_auto_increment( 4840 self, 4841 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4842 start = None 4843 increment = None 4844 4845 if self._match(TokenType.L_PAREN, advance=False): 4846 args = self._parse_wrapped_csv(self._parse_bitwise) 4847 start = seq_get(args, 0) 4848 increment = seq_get(args, 1) 4849 elif self._match_text_seq("START"): 4850 start = self._parse_bitwise() 4851 self._match_text_seq("INCREMENT") 4852 increment = self._parse_bitwise() 4853 4854 if start and increment: 4855 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4856 4857 return exp.AutoIncrementColumnConstraint() 4858 4859 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4860 if not self._match_text_seq("REFRESH"): 4861 self._retreat(self._index - 1) 4862 return None 4863 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4864 4865 def _parse_compress(self) -> exp.CompressColumnConstraint: 4866 if self._match(TokenType.L_PAREN, advance=False): 4867 return self.expression( 4868 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4869 ) 4870 4871 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4872 4873 def _parse_generated_as_identity( 4874 self, 4875 ) -> ( 4876 exp.GeneratedAsIdentityColumnConstraint 4877 | exp.ComputedColumnConstraint 4878 | exp.GeneratedAsRowColumnConstraint 4879 ): 4880 if self._match_text_seq("BY", "DEFAULT"): 4881 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4882 this = self.expression( 4883 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4884 ) 4885 else: 4886 self._match_text_seq("ALWAYS") 4887 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4888 4889 self._match(TokenType.ALIAS) 4890 4891 if self._match_text_seq("ROW"): 4892 start = self._match_text_seq("START") 4893 if not start: 4894 self._match(TokenType.END) 4895 hidden = self._match_text_seq("HIDDEN") 4896 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4897 4898 identity = self._match_text_seq("IDENTITY") 4899 4900 if self._match(TokenType.L_PAREN): 4901 if self._match(TokenType.START_WITH): 4902 this.set("start", self._parse_bitwise()) 4903 if self._match_text_seq("INCREMENT", "BY"): 4904 this.set("increment", self._parse_bitwise()) 4905 if self._match_text_seq("MINVALUE"): 4906 this.set("minvalue", self._parse_bitwise()) 4907 if self._match_text_seq("MAXVALUE"): 4908 this.set("maxvalue", self._parse_bitwise()) 4909 4910 if self._match_text_seq("CYCLE"): 4911 this.set("cycle", True) 4912 elif self._match_text_seq("NO", "CYCLE"): 4913 this.set("cycle", False) 4914 4915 if not identity: 4916 this.set("expression", self._parse_range()) 4917 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4918 args = self._parse_csv(self._parse_bitwise) 4919 this.set("start", seq_get(args, 0)) 4920 this.set("increment", seq_get(args, 1)) 4921 4922 self._match_r_paren() 4923 4924 return this 4925 4926 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4927 self._match_text_seq("LENGTH") 4928 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4929 4930 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4931 if self._match_text_seq("NULL"): 4932 return self.expression(exp.NotNullColumnConstraint) 4933 if self._match_text_seq("CASESPECIFIC"): 4934 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4935 if self._match_text_seq("FOR", "REPLICATION"): 4936 return self.expression(exp.NotForReplicationColumnConstraint) 4937 return None 4938 4939 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4940 if self._match(TokenType.CONSTRAINT): 4941 this = self._parse_id_var() 4942 else: 4943 this = None 4944 4945 if self._match_texts(self.CONSTRAINT_PARSERS): 4946 return self.expression( 4947 exp.ColumnConstraint, 4948 this=this, 4949 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4950 ) 4951 4952 return this 4953 4954 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4955 if not self._match(TokenType.CONSTRAINT): 4956 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4957 4958 return self.expression( 4959 exp.Constraint, 4960 this=self._parse_id_var(), 4961 expressions=self._parse_unnamed_constraints(), 4962 ) 4963 4964 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4965 constraints = [] 4966 while True: 4967 constraint = self._parse_unnamed_constraint() or self._parse_function() 4968 if not constraint: 4969 break 4970 constraints.append(constraint) 4971 4972 return constraints 4973 4974 def _parse_unnamed_constraint( 4975 self, constraints: t.Optional[t.Collection[str]] = None 4976 ) -> t.Optional[exp.Expression]: 4977 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4978 constraints or self.CONSTRAINT_PARSERS 4979 ): 4980 return None 4981 4982 constraint = self._prev.text.upper() 4983 if constraint not in self.CONSTRAINT_PARSERS: 4984 self.raise_error(f"No parser found for schema constraint {constraint}.") 4985 4986 return self.CONSTRAINT_PARSERS[constraint](self) 4987 4988 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4989 self._match_text_seq("KEY") 4990 return self.expression( 4991 exp.UniqueColumnConstraint, 4992 this=self._parse_schema(self._parse_id_var(any_token=False)), 4993 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4994 on_conflict=self._parse_on_conflict(), 4995 ) 4996 4997 def _parse_key_constraint_options(self) -> t.List[str]: 4998 options = [] 4999 while True: 5000 if not self._curr: 5001 break 5002 5003 if self._match(TokenType.ON): 5004 action = None 5005 on = self._advance_any() and self._prev.text 5006 5007 if self._match_text_seq("NO", "ACTION"): 5008 action = "NO ACTION" 5009 elif self._match_text_seq("CASCADE"): 5010 action = "CASCADE" 5011 elif self._match_text_seq("RESTRICT"): 5012 action = "RESTRICT" 5013 elif self._match_pair(TokenType.SET, TokenType.NULL): 5014 action = "SET NULL" 5015 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5016 action = "SET DEFAULT" 5017 else: 5018 self.raise_error("Invalid key constraint") 5019 5020 options.append(f"ON {on} {action}") 5021 elif self._match_text_seq("NOT", "ENFORCED"): 5022 options.append("NOT ENFORCED") 5023 elif self._match_text_seq("DEFERRABLE"): 5024 options.append("DEFERRABLE") 5025 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5026 options.append("INITIALLY DEFERRED") 5027 elif self._match_text_seq("NORELY"): 5028 options.append("NORELY") 5029 elif self._match_text_seq("MATCH", "FULL"): 5030 options.append("MATCH FULL") 5031 else: 5032 break 5033 5034 return options 5035 5036 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5037 if match and not self._match(TokenType.REFERENCES): 5038 return None 5039 5040 expressions = None 5041 this = self._parse_table(schema=True) 5042 options = self._parse_key_constraint_options() 5043 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5044 5045 def _parse_foreign_key(self) -> exp.ForeignKey: 5046 expressions = self._parse_wrapped_id_vars() 5047 reference = self._parse_references() 5048 options = {} 5049 5050 while self._match(TokenType.ON): 5051 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5052 self.raise_error("Expected DELETE or UPDATE") 5053 5054 kind = self._prev.text.lower() 5055 5056 if self._match_text_seq("NO", "ACTION"): 5057 action = "NO ACTION" 5058 elif self._match(TokenType.SET): 5059 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5060 action = "SET " + self._prev.text.upper() 5061 else: 5062 self._advance() 5063 action = self._prev.text.upper() 5064 5065 options[kind] = action 5066 5067 return self.expression( 5068 exp.ForeignKey, 5069 expressions=expressions, 5070 reference=reference, 5071 **options, # type: ignore 5072 ) 5073 5074 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5075 return self._parse_field() 5076 5077 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5078 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5079 self._retreat(self._index - 1) 5080 return None 5081 5082 id_vars = self._parse_wrapped_id_vars() 5083 return self.expression( 5084 exp.PeriodForSystemTimeConstraint, 5085 this=seq_get(id_vars, 0), 5086 expression=seq_get(id_vars, 1), 5087 ) 5088 5089 def _parse_primary_key( 5090 self, wrapped_optional: bool = False, in_props: bool = False 5091 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5092 desc = ( 5093 self._match_set((TokenType.ASC, TokenType.DESC)) 5094 and self._prev.token_type == TokenType.DESC 5095 ) 5096 5097 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5098 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5099 5100 expressions = self._parse_wrapped_csv( 5101 self._parse_primary_key_part, optional=wrapped_optional 5102 ) 5103 options = self._parse_key_constraint_options() 5104 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5105 5106 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5107 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 5108 5109 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5110 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5111 return this 5112 5113 bracket_kind = self._prev.token_type 5114 expressions = self._parse_csv( 5115 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5116 ) 5117 5118 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5119 self.raise_error("Expected ]") 5120 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5121 self.raise_error("Expected }") 5122 5123 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5124 if bracket_kind == TokenType.L_BRACE: 5125 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5126 elif not this or this.name.upper() == "ARRAY": 5127 this = self.expression(exp.Array, expressions=expressions) 5128 else: 5129 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5130 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5131 5132 self._add_comments(this) 5133 return self._parse_bracket(this) 5134 5135 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5136 if self._match(TokenType.COLON): 5137 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 5138 return this 5139 5140 def _parse_case(self) -> t.Optional[exp.Expression]: 5141 ifs = [] 5142 default = None 5143 5144 comments = self._prev_comments 5145 expression = self._parse_conjunction() 5146 5147 while self._match(TokenType.WHEN): 5148 this = self._parse_conjunction() 5149 self._match(TokenType.THEN) 5150 then = self._parse_conjunction() 5151 ifs.append(self.expression(exp.If, this=this, true=then)) 5152 5153 if self._match(TokenType.ELSE): 5154 default = self._parse_conjunction() 5155 5156 if not self._match(TokenType.END): 5157 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5158 default = exp.column("interval") 5159 else: 5160 self.raise_error("Expected END after CASE", self._prev) 5161 5162 return self.expression( 5163 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5164 ) 5165 5166 def _parse_if(self) -> t.Optional[exp.Expression]: 5167 if self._match(TokenType.L_PAREN): 5168 args = self._parse_csv(self._parse_conjunction) 5169 this = self.validate_expression(exp.If.from_arg_list(args), args) 5170 self._match_r_paren() 5171 else: 5172 index = self._index - 1 5173 5174 if self.NO_PAREN_IF_COMMANDS and index == 0: 5175 return self._parse_as_command(self._prev) 5176 5177 condition = self._parse_conjunction() 5178 5179 if not condition: 5180 self._retreat(index) 5181 return None 5182 5183 self._match(TokenType.THEN) 5184 true = self._parse_conjunction() 5185 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 5186 self._match(TokenType.END) 5187 this = self.expression(exp.If, this=condition, true=true, false=false) 5188 5189 return this 5190 5191 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5192 if not self._match_text_seq("VALUE", "FOR"): 5193 self._retreat(self._index - 1) 5194 return None 5195 5196 return self.expression( 5197 exp.NextValueFor, 5198 this=self._parse_column(), 5199 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5200 ) 5201 5202 def _parse_extract(self) -> exp.Extract: 5203 this = self._parse_function() or self._parse_var() or self._parse_type() 5204 5205 if self._match(TokenType.FROM): 5206 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5207 5208 if not self._match(TokenType.COMMA): 5209 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5210 5211 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5212 5213 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5214 this = self._parse_conjunction() 5215 5216 if not self._match(TokenType.ALIAS): 5217 if self._match(TokenType.COMMA): 5218 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5219 5220 self.raise_error("Expected AS after CAST") 5221 5222 fmt = None 5223 to = self._parse_types() 5224 5225 if self._match(TokenType.FORMAT): 5226 fmt_string = self._parse_string() 5227 fmt = self._parse_at_time_zone(fmt_string) 5228 5229 if not to: 5230 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5231 if to.this in exp.DataType.TEMPORAL_TYPES: 5232 this = self.expression( 5233 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5234 this=this, 5235 format=exp.Literal.string( 5236 format_time( 5237 fmt_string.this if fmt_string else "", 5238 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5239 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5240 ) 5241 ), 5242 ) 5243 5244 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5245 this.set("zone", fmt.args["zone"]) 5246 return this 5247 elif not to: 5248 self.raise_error("Expected TYPE after CAST") 5249 elif isinstance(to, exp.Identifier): 5250 to = exp.DataType.build(to.name, udt=True) 5251 elif to.this == exp.DataType.Type.CHAR: 5252 if self._match(TokenType.CHARACTER_SET): 5253 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5254 5255 return self.expression( 5256 exp.Cast if strict else exp.TryCast, 5257 this=this, 5258 to=to, 5259 format=fmt, 5260 safe=safe, 5261 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5262 ) 5263 5264 def _parse_string_agg(self) -> exp.Expression: 5265 if self._match(TokenType.DISTINCT): 5266 args: t.List[t.Optional[exp.Expression]] = [ 5267 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5268 ] 5269 if self._match(TokenType.COMMA): 5270 args.extend(self._parse_csv(self._parse_conjunction)) 5271 else: 5272 args = self._parse_csv(self._parse_conjunction) # type: ignore 5273 5274 index = self._index 5275 if not self._match(TokenType.R_PAREN) and args: 5276 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5277 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5278 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5279 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5280 5281 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5282 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5283 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5284 if not self._match_text_seq("WITHIN", "GROUP"): 5285 self._retreat(index) 5286 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5287 5288 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5289 order = self._parse_order(this=seq_get(args, 0)) 5290 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5291 5292 def _parse_convert( 5293 self, strict: bool, safe: t.Optional[bool] = None 5294 ) -> t.Optional[exp.Expression]: 5295 this = self._parse_bitwise() 5296 5297 if self._match(TokenType.USING): 5298 to: t.Optional[exp.Expression] = self.expression( 5299 exp.CharacterSet, this=self._parse_var() 5300 ) 5301 elif self._match(TokenType.COMMA): 5302 to = self._parse_types() 5303 else: 5304 to = None 5305 5306 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5307 5308 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5309 """ 5310 There are generally two variants of the DECODE function: 5311 5312 - DECODE(bin, charset) 5313 - DECODE(expression, search, result [, search, result] ... [, default]) 5314 5315 The second variant will always be parsed into a CASE expression. Note that NULL 5316 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5317 instead of relying on pattern matching. 5318 """ 5319 args = self._parse_csv(self._parse_conjunction) 5320 5321 if len(args) < 3: 5322 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5323 5324 expression, *expressions = args 5325 if not expression: 5326 return None 5327 5328 ifs = [] 5329 for search, result in zip(expressions[::2], expressions[1::2]): 5330 if not search or not result: 5331 return None 5332 5333 if isinstance(search, exp.Literal): 5334 ifs.append( 5335 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5336 ) 5337 elif isinstance(search, exp.Null): 5338 ifs.append( 5339 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5340 ) 5341 else: 5342 cond = exp.or_( 5343 exp.EQ(this=expression.copy(), expression=search), 5344 exp.and_( 5345 exp.Is(this=expression.copy(), expression=exp.Null()), 5346 exp.Is(this=search.copy(), expression=exp.Null()), 5347 copy=False, 5348 ), 5349 copy=False, 5350 ) 5351 ifs.append(exp.If(this=cond, true=result)) 5352 5353 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5354 5355 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5356 self._match_text_seq("KEY") 5357 key = self._parse_column() 5358 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5359 self._match_text_seq("VALUE") 5360 value = self._parse_bitwise() 5361 5362 if not key and not value: 5363 return None 5364 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5365 5366 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5367 if not this or not self._match_text_seq("FORMAT", "JSON"): 5368 return this 5369 5370 return self.expression(exp.FormatJson, this=this) 5371 5372 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5373 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5374 for value in values: 5375 if self._match_text_seq(value, "ON", on): 5376 return f"{value} ON {on}" 5377 5378 return None 5379 5380 @t.overload 5381 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5382 5383 @t.overload 5384 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5385 5386 def _parse_json_object(self, agg=False): 5387 star = self._parse_star() 5388 expressions = ( 5389 [star] 5390 if star 5391 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5392 ) 5393 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5394 5395 unique_keys = None 5396 if self._match_text_seq("WITH", "UNIQUE"): 5397 unique_keys = True 5398 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5399 unique_keys = False 5400 5401 self._match_text_seq("KEYS") 5402 5403 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5404 self._parse_type() 5405 ) 5406 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5407 5408 return self.expression( 5409 exp.JSONObjectAgg if agg else exp.JSONObject, 5410 expressions=expressions, 5411 null_handling=null_handling, 5412 unique_keys=unique_keys, 5413 return_type=return_type, 5414 encoding=encoding, 5415 ) 5416 5417 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5418 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5419 if not self._match_text_seq("NESTED"): 5420 this = self._parse_id_var() 5421 kind = self._parse_types(allow_identifiers=False) 5422 nested = None 5423 else: 5424 this = None 5425 kind = None 5426 nested = True 5427 5428 path = self._match_text_seq("PATH") and self._parse_string() 5429 nested_schema = nested and self._parse_json_schema() 5430 5431 return self.expression( 5432 exp.JSONColumnDef, 5433 this=this, 5434 kind=kind, 5435 path=path, 5436 nested_schema=nested_schema, 5437 ) 5438 5439 def _parse_json_schema(self) -> exp.JSONSchema: 5440 self._match_text_seq("COLUMNS") 5441 return self.expression( 5442 exp.JSONSchema, 5443 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5444 ) 5445 5446 def _parse_json_table(self) -> exp.JSONTable: 5447 this = self._parse_format_json(self._parse_bitwise()) 5448 path = self._match(TokenType.COMMA) and self._parse_string() 5449 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5450 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5451 schema = self._parse_json_schema() 5452 5453 return exp.JSONTable( 5454 this=this, 5455 schema=schema, 5456 path=path, 5457 error_handling=error_handling, 5458 empty_handling=empty_handling, 5459 ) 5460 5461 def _parse_match_against(self) -> exp.MatchAgainst: 5462 expressions = self._parse_csv(self._parse_column) 5463 5464 self._match_text_seq(")", "AGAINST", "(") 5465 5466 this = self._parse_string() 5467 5468 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5469 modifier = "IN NATURAL LANGUAGE MODE" 5470 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5471 modifier = f"{modifier} WITH QUERY EXPANSION" 5472 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5473 modifier = "IN BOOLEAN MODE" 5474 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5475 modifier = "WITH QUERY EXPANSION" 5476 else: 5477 modifier = None 5478 5479 return self.expression( 5480 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5481 ) 5482 5483 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5484 def _parse_open_json(self) -> exp.OpenJSON: 5485 this = self._parse_bitwise() 5486 path = self._match(TokenType.COMMA) and self._parse_string() 5487 5488 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5489 this = self._parse_field(any_token=True) 5490 kind = self._parse_types() 5491 path = self._parse_string() 5492 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5493 5494 return self.expression( 5495 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5496 ) 5497 5498 expressions = None 5499 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5500 self._match_l_paren() 5501 expressions = self._parse_csv(_parse_open_json_column_def) 5502 5503 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5504 5505 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5506 args = self._parse_csv(self._parse_bitwise) 5507 5508 if self._match(TokenType.IN): 5509 return self.expression( 5510 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5511 ) 5512 5513 if haystack_first: 5514 haystack = seq_get(args, 0) 5515 needle = seq_get(args, 1) 5516 else: 5517 needle = seq_get(args, 0) 5518 haystack = seq_get(args, 1) 5519 5520 return self.expression( 5521 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5522 ) 5523 5524 def _parse_predict(self) -> exp.Predict: 5525 self._match_text_seq("MODEL") 5526 this = self._parse_table() 5527 5528 self._match(TokenType.COMMA) 5529 self._match_text_seq("TABLE") 5530 5531 return self.expression( 5532 exp.Predict, 5533 this=this, 5534 expression=self._parse_table(), 5535 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5536 ) 5537 5538 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5539 args = self._parse_csv(self._parse_table) 5540 return exp.JoinHint(this=func_name.upper(), expressions=args) 5541 5542 def _parse_substring(self) -> exp.Substring: 5543 # Postgres supports the form: substring(string [from int] [for int]) 5544 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5545 5546 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5547 5548 if self._match(TokenType.FROM): 5549 args.append(self._parse_bitwise()) 5550 if self._match(TokenType.FOR): 5551 if len(args) == 1: 5552 args.append(exp.Literal.number(1)) 5553 args.append(self._parse_bitwise()) 5554 5555 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5556 5557 def _parse_trim(self) -> exp.Trim: 5558 # https://www.w3resource.com/sql/character-functions/trim.php 5559 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5560 5561 position = None 5562 collation = None 5563 expression = None 5564 5565 if self._match_texts(self.TRIM_TYPES): 5566 position = self._prev.text.upper() 5567 5568 this = self._parse_bitwise() 5569 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5570 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5571 expression = self._parse_bitwise() 5572 5573 if invert_order: 5574 this, expression = expression, this 5575 5576 if self._match(TokenType.COLLATE): 5577 collation = self._parse_bitwise() 5578 5579 return self.expression( 5580 exp.Trim, this=this, position=position, expression=expression, collation=collation 5581 ) 5582 5583 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5584 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5585 5586 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5587 return self._parse_window(self._parse_id_var(), alias=True) 5588 5589 def _parse_respect_or_ignore_nulls( 5590 self, this: t.Optional[exp.Expression] 5591 ) -> t.Optional[exp.Expression]: 5592 if self._match_text_seq("IGNORE", "NULLS"): 5593 return self.expression(exp.IgnoreNulls, this=this) 5594 if self._match_text_seq("RESPECT", "NULLS"): 5595 return self.expression(exp.RespectNulls, this=this) 5596 return this 5597 5598 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5599 if self._match(TokenType.HAVING): 5600 self._match_texts(("MAX", "MIN")) 5601 max = self._prev.text.upper() != "MIN" 5602 return self.expression( 5603 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5604 ) 5605 5606 return this 5607 5608 def _parse_window( 5609 self, this: t.Optional[exp.Expression], alias: bool = False 5610 ) -> t.Optional[exp.Expression]: 5611 func = this 5612 comments = func.comments if isinstance(func, exp.Expression) else None 5613 5614 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5615 self._match(TokenType.WHERE) 5616 this = self.expression( 5617 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5618 ) 5619 self._match_r_paren() 5620 5621 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5622 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5623 if self._match_text_seq("WITHIN", "GROUP"): 5624 order = self._parse_wrapped(self._parse_order) 5625 this = self.expression(exp.WithinGroup, this=this, expression=order) 5626 5627 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5628 # Some dialects choose to implement and some do not. 5629 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5630 5631 # There is some code above in _parse_lambda that handles 5632 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5633 5634 # The below changes handle 5635 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5636 5637 # Oracle allows both formats 5638 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5639 # and Snowflake chose to do the same for familiarity 5640 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5641 if isinstance(this, exp.AggFunc): 5642 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5643 5644 if ignore_respect and ignore_respect is not this: 5645 ignore_respect.replace(ignore_respect.this) 5646 this = self.expression(ignore_respect.__class__, this=this) 5647 5648 this = self._parse_respect_or_ignore_nulls(this) 5649 5650 # bigquery select from window x AS (partition by ...) 5651 if alias: 5652 over = None 5653 self._match(TokenType.ALIAS) 5654 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5655 return this 5656 else: 5657 over = self._prev.text.upper() 5658 5659 if comments and isinstance(func, exp.Expression): 5660 func.pop_comments() 5661 5662 if not self._match(TokenType.L_PAREN): 5663 return self.expression( 5664 exp.Window, 5665 comments=comments, 5666 this=this, 5667 alias=self._parse_id_var(False), 5668 over=over, 5669 ) 5670 5671 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5672 5673 first = self._match(TokenType.FIRST) 5674 if self._match_text_seq("LAST"): 5675 first = False 5676 5677 partition, order = self._parse_partition_and_order() 5678 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5679 5680 if kind: 5681 self._match(TokenType.BETWEEN) 5682 start = self._parse_window_spec() 5683 self._match(TokenType.AND) 5684 end = self._parse_window_spec() 5685 5686 spec = self.expression( 5687 exp.WindowSpec, 5688 kind=kind, 5689 start=start["value"], 5690 start_side=start["side"], 5691 end=end["value"], 5692 end_side=end["side"], 5693 ) 5694 else: 5695 spec = None 5696 5697 self._match_r_paren() 5698 5699 window = self.expression( 5700 exp.Window, 5701 comments=comments, 5702 this=this, 5703 partition_by=partition, 5704 order=order, 5705 spec=spec, 5706 alias=window_alias, 5707 over=over, 5708 first=first, 5709 ) 5710 5711 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5712 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5713 return self._parse_window(window, alias=alias) 5714 5715 return window 5716 5717 def _parse_partition_and_order( 5718 self, 5719 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5720 return self._parse_partition_by(), self._parse_order() 5721 5722 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5723 self._match(TokenType.BETWEEN) 5724 5725 return { 5726 "value": ( 5727 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5728 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5729 or self._parse_bitwise() 5730 ), 5731 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5732 } 5733 5734 def _parse_alias( 5735 self, this: t.Optional[exp.Expression], explicit: bool = False 5736 ) -> t.Optional[exp.Expression]: 5737 any_token = self._match(TokenType.ALIAS) 5738 comments = self._prev_comments or [] 5739 5740 if explicit and not any_token: 5741 return this 5742 5743 if self._match(TokenType.L_PAREN): 5744 aliases = self.expression( 5745 exp.Aliases, 5746 comments=comments, 5747 this=this, 5748 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5749 ) 5750 self._match_r_paren(aliases) 5751 return aliases 5752 5753 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5754 self.STRING_ALIASES and self._parse_string_as_identifier() 5755 ) 5756 5757 if alias: 5758 comments.extend(alias.pop_comments()) 5759 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5760 column = this.this 5761 5762 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5763 if not this.comments and column and column.comments: 5764 this.comments = column.pop_comments() 5765 5766 return this 5767 5768 def _parse_id_var( 5769 self, 5770 any_token: bool = True, 5771 tokens: t.Optional[t.Collection[TokenType]] = None, 5772 ) -> t.Optional[exp.Expression]: 5773 expression = self._parse_identifier() 5774 if not expression and ( 5775 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5776 ): 5777 quoted = self._prev.token_type == TokenType.STRING 5778 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5779 5780 return expression 5781 5782 def _parse_string(self) -> t.Optional[exp.Expression]: 5783 if self._match_set(self.STRING_PARSERS): 5784 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5785 return self._parse_placeholder() 5786 5787 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5788 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5789 5790 def _parse_number(self) -> t.Optional[exp.Expression]: 5791 if self._match_set(self.NUMERIC_PARSERS): 5792 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5793 return self._parse_placeholder() 5794 5795 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5796 if self._match(TokenType.IDENTIFIER): 5797 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5798 return self._parse_placeholder() 5799 5800 def _parse_var( 5801 self, 5802 any_token: bool = False, 5803 tokens: t.Optional[t.Collection[TokenType]] = None, 5804 upper: bool = False, 5805 ) -> t.Optional[exp.Expression]: 5806 if ( 5807 (any_token and self._advance_any()) 5808 or self._match(TokenType.VAR) 5809 or (self._match_set(tokens) if tokens else False) 5810 ): 5811 return self.expression( 5812 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5813 ) 5814 return self._parse_placeholder() 5815 5816 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5817 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5818 self._advance() 5819 return self._prev 5820 return None 5821 5822 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5823 return self._parse_var() or self._parse_string() 5824 5825 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5826 return self._parse_primary() or self._parse_var(any_token=True) 5827 5828 def _parse_null(self) -> t.Optional[exp.Expression]: 5829 if self._match_set(self.NULL_TOKENS): 5830 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5831 return self._parse_placeholder() 5832 5833 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5834 if self._match(TokenType.TRUE): 5835 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5836 if self._match(TokenType.FALSE): 5837 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5838 return self._parse_placeholder() 5839 5840 def _parse_star(self) -> t.Optional[exp.Expression]: 5841 if self._match(TokenType.STAR): 5842 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5843 return self._parse_placeholder() 5844 5845 def _parse_parameter(self) -> exp.Parameter: 5846 this = self._parse_identifier() or self._parse_primary_or_var() 5847 return self.expression(exp.Parameter, this=this) 5848 5849 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5850 if self._match_set(self.PLACEHOLDER_PARSERS): 5851 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5852 if placeholder: 5853 return placeholder 5854 self._advance(-1) 5855 return None 5856 5857 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5858 if not self._match_texts(keywords): 5859 return None 5860 if self._match(TokenType.L_PAREN, advance=False): 5861 return self._parse_wrapped_csv(self._parse_expression) 5862 5863 expression = self._parse_expression() 5864 return [expression] if expression else None 5865 5866 def _parse_csv( 5867 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5868 ) -> t.List[exp.Expression]: 5869 parse_result = parse_method() 5870 items = [parse_result] if parse_result is not None else [] 5871 5872 while self._match(sep): 5873 self._add_comments(parse_result) 5874 parse_result = parse_method() 5875 if parse_result is not None: 5876 items.append(parse_result) 5877 5878 return items 5879 5880 def _parse_tokens( 5881 self, parse_method: t.Callable, expressions: t.Dict 5882 ) -> t.Optional[exp.Expression]: 5883 this = parse_method() 5884 5885 while self._match_set(expressions): 5886 this = self.expression( 5887 expressions[self._prev.token_type], 5888 this=this, 5889 comments=self._prev_comments, 5890 expression=parse_method(), 5891 ) 5892 5893 return this 5894 5895 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5896 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5897 5898 def _parse_wrapped_csv( 5899 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5900 ) -> t.List[exp.Expression]: 5901 return self._parse_wrapped( 5902 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5903 ) 5904 5905 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5906 wrapped = self._match(TokenType.L_PAREN) 5907 if not wrapped and not optional: 5908 self.raise_error("Expecting (") 5909 parse_result = parse_method() 5910 if wrapped: 5911 self._match_r_paren() 5912 return parse_result 5913 5914 def _parse_expressions(self) -> t.List[exp.Expression]: 5915 return self._parse_csv(self._parse_expression) 5916 5917 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5918 return self._parse_select() or self._parse_set_operations( 5919 self._parse_expression() if alias else self._parse_conjunction() 5920 ) 5921 5922 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5923 return self._parse_query_modifiers( 5924 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5925 ) 5926 5927 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5928 this = None 5929 if self._match_texts(self.TRANSACTION_KIND): 5930 this = self._prev.text 5931 5932 self._match_texts(("TRANSACTION", "WORK")) 5933 5934 modes = [] 5935 while True: 5936 mode = [] 5937 while self._match(TokenType.VAR): 5938 mode.append(self._prev.text) 5939 5940 if mode: 5941 modes.append(" ".join(mode)) 5942 if not self._match(TokenType.COMMA): 5943 break 5944 5945 return self.expression(exp.Transaction, this=this, modes=modes) 5946 5947 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5948 chain = None 5949 savepoint = None 5950 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5951 5952 self._match_texts(("TRANSACTION", "WORK")) 5953 5954 if self._match_text_seq("TO"): 5955 self._match_text_seq("SAVEPOINT") 5956 savepoint = self._parse_id_var() 5957 5958 if self._match(TokenType.AND): 5959 chain = not self._match_text_seq("NO") 5960 self._match_text_seq("CHAIN") 5961 5962 if is_rollback: 5963 return self.expression(exp.Rollback, savepoint=savepoint) 5964 5965 return self.expression(exp.Commit, chain=chain) 5966 5967 def _parse_refresh(self) -> exp.Refresh: 5968 self._match(TokenType.TABLE) 5969 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5970 5971 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5972 if not self._match_text_seq("ADD"): 5973 return None 5974 5975 self._match(TokenType.COLUMN) 5976 exists_column = self._parse_exists(not_=True) 5977 expression = self._parse_field_def() 5978 5979 if expression: 5980 expression.set("exists", exists_column) 5981 5982 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5983 if self._match_texts(("FIRST", "AFTER")): 5984 position = self._prev.text 5985 column_position = self.expression( 5986 exp.ColumnPosition, this=self._parse_column(), position=position 5987 ) 5988 expression.set("position", column_position) 5989 5990 return expression 5991 5992 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5993 drop = self._match(TokenType.DROP) and self._parse_drop() 5994 if drop and not isinstance(drop, exp.Command): 5995 drop.set("kind", drop.args.get("kind", "COLUMN")) 5996 return drop 5997 5998 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5999 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6000 return self.expression( 6001 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6002 ) 6003 6004 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6005 index = self._index - 1 6006 6007 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6008 return self._parse_csv( 6009 lambda: self.expression( 6010 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6011 ) 6012 ) 6013 6014 self._retreat(index) 6015 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6016 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6017 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6018 6019 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6020 if self._match_texts(self.ALTER_ALTER_PARSERS): 6021 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6022 6023 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6024 # keyword after ALTER we default to parsing this statement 6025 self._match(TokenType.COLUMN) 6026 column = self._parse_field(any_token=True) 6027 6028 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6029 return self.expression(exp.AlterColumn, this=column, drop=True) 6030 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6031 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 6032 if self._match(TokenType.COMMENT): 6033 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6034 6035 self._match_text_seq("SET", "DATA") 6036 self._match_text_seq("TYPE") 6037 return self.expression( 6038 exp.AlterColumn, 6039 this=column, 6040 dtype=self._parse_types(), 6041 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6042 using=self._match(TokenType.USING) and self._parse_conjunction(), 6043 ) 6044 6045 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6046 if self._match_texts(("ALL", "EVEN", "AUTO")): 6047 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6048 6049 self._match_text_seq("KEY", "DISTKEY") 6050 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6051 6052 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6053 if compound: 6054 self._match_text_seq("SORTKEY") 6055 6056 if self._match(TokenType.L_PAREN, advance=False): 6057 return self.expression( 6058 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6059 ) 6060 6061 self._match_texts(("AUTO", "NONE")) 6062 return self.expression( 6063 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6064 ) 6065 6066 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6067 index = self._index - 1 6068 6069 partition_exists = self._parse_exists() 6070 if self._match(TokenType.PARTITION, advance=False): 6071 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6072 6073 self._retreat(index) 6074 return self._parse_csv(self._parse_drop_column) 6075 6076 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6077 if self._match(TokenType.COLUMN): 6078 exists = self._parse_exists() 6079 old_column = self._parse_column() 6080 to = self._match_text_seq("TO") 6081 new_column = self._parse_column() 6082 6083 if old_column is None or to is None or new_column is None: 6084 return None 6085 6086 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6087 6088 self._match_text_seq("TO") 6089 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6090 6091 def _parse_alter_table_set(self) -> exp.AlterSet: 6092 alter_set = self.expression(exp.AlterSet) 6093 6094 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6095 "TABLE", "PROPERTIES" 6096 ): 6097 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_conjunction)) 6098 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6099 alter_set.set("expressions", [self._parse_conjunction()]) 6100 elif self._match_texts(("LOGGED", "UNLOGGED")): 6101 alter_set.set("option", exp.var(self._prev.text.upper())) 6102 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6103 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6104 elif self._match_text_seq("LOCATION"): 6105 alter_set.set("location", self._parse_field()) 6106 elif self._match_text_seq("ACCESS", "METHOD"): 6107 alter_set.set("access_method", self._parse_field()) 6108 elif self._match_text_seq("TABLESPACE"): 6109 alter_set.set("tablespace", self._parse_field()) 6110 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6111 alter_set.set("file_format", [self._parse_field()]) 6112 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6113 alter_set.set("file_format", self._parse_wrapped_options()) 6114 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6115 alter_set.set("copy_options", self._parse_wrapped_options()) 6116 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6117 alter_set.set("tag", self._parse_csv(self._parse_conjunction)) 6118 else: 6119 if self._match_text_seq("SERDE"): 6120 alter_set.set("serde", self._parse_field()) 6121 6122 alter_set.set("expressions", [self._parse_properties()]) 6123 6124 return alter_set 6125 6126 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6127 start = self._prev 6128 6129 if not self._match(TokenType.TABLE): 6130 return self._parse_as_command(start) 6131 6132 exists = self._parse_exists() 6133 only = self._match_text_seq("ONLY") 6134 this = self._parse_table(schema=True) 6135 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6136 6137 if self._next: 6138 self._advance() 6139 6140 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6141 if parser: 6142 actions = ensure_list(parser(self)) 6143 options = self._parse_csv(self._parse_property) 6144 6145 if not self._curr and actions: 6146 return self.expression( 6147 exp.AlterTable, 6148 this=this, 6149 exists=exists, 6150 actions=actions, 6151 only=only, 6152 options=options, 6153 cluster=cluster, 6154 ) 6155 6156 return self._parse_as_command(start) 6157 6158 def _parse_merge(self) -> exp.Merge: 6159 self._match(TokenType.INTO) 6160 target = self._parse_table() 6161 6162 if target and self._match(TokenType.ALIAS, advance=False): 6163 target.set("alias", self._parse_table_alias()) 6164 6165 self._match(TokenType.USING) 6166 using = self._parse_table() 6167 6168 self._match(TokenType.ON) 6169 on = self._parse_conjunction() 6170 6171 return self.expression( 6172 exp.Merge, 6173 this=target, 6174 using=using, 6175 on=on, 6176 expressions=self._parse_when_matched(), 6177 ) 6178 6179 def _parse_when_matched(self) -> t.List[exp.When]: 6180 whens = [] 6181 6182 while self._match(TokenType.WHEN): 6183 matched = not self._match(TokenType.NOT) 6184 self._match_text_seq("MATCHED") 6185 source = ( 6186 False 6187 if self._match_text_seq("BY", "TARGET") 6188 else self._match_text_seq("BY", "SOURCE") 6189 ) 6190 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 6191 6192 self._match(TokenType.THEN) 6193 6194 if self._match(TokenType.INSERT): 6195 _this = self._parse_star() 6196 if _this: 6197 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6198 else: 6199 then = self.expression( 6200 exp.Insert, 6201 this=self._parse_value(), 6202 expression=self._match_text_seq("VALUES") and self._parse_value(), 6203 ) 6204 elif self._match(TokenType.UPDATE): 6205 expressions = self._parse_star() 6206 if expressions: 6207 then = self.expression(exp.Update, expressions=expressions) 6208 else: 6209 then = self.expression( 6210 exp.Update, 6211 expressions=self._match(TokenType.SET) 6212 and self._parse_csv(self._parse_equality), 6213 ) 6214 elif self._match(TokenType.DELETE): 6215 then = self.expression(exp.Var, this=self._prev.text) 6216 else: 6217 then = None 6218 6219 whens.append( 6220 self.expression( 6221 exp.When, 6222 matched=matched, 6223 source=source, 6224 condition=condition, 6225 then=then, 6226 ) 6227 ) 6228 return whens 6229 6230 def _parse_show(self) -> t.Optional[exp.Expression]: 6231 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6232 if parser: 6233 return parser(self) 6234 return self._parse_as_command(self._prev) 6235 6236 def _parse_set_item_assignment( 6237 self, kind: t.Optional[str] = None 6238 ) -> t.Optional[exp.Expression]: 6239 index = self._index 6240 6241 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6242 return self._parse_set_transaction(global_=kind == "GLOBAL") 6243 6244 left = self._parse_primary() or self._parse_column() 6245 assignment_delimiter = self._match_texts(("=", "TO")) 6246 6247 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6248 self._retreat(index) 6249 return None 6250 6251 right = self._parse_statement() or self._parse_id_var() 6252 if isinstance(right, (exp.Column, exp.Identifier)): 6253 right = exp.var(right.name) 6254 6255 this = self.expression(exp.EQ, this=left, expression=right) 6256 return self.expression(exp.SetItem, this=this, kind=kind) 6257 6258 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6259 self._match_text_seq("TRANSACTION") 6260 characteristics = self._parse_csv( 6261 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6262 ) 6263 return self.expression( 6264 exp.SetItem, 6265 expressions=characteristics, 6266 kind="TRANSACTION", 6267 **{"global": global_}, # type: ignore 6268 ) 6269 6270 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6271 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6272 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6273 6274 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6275 index = self._index 6276 set_ = self.expression( 6277 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6278 ) 6279 6280 if self._curr: 6281 self._retreat(index) 6282 return self._parse_as_command(self._prev) 6283 6284 return set_ 6285 6286 def _parse_var_from_options( 6287 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6288 ) -> t.Optional[exp.Var]: 6289 start = self._curr 6290 if not start: 6291 return None 6292 6293 option = start.text.upper() 6294 continuations = options.get(option) 6295 6296 index = self._index 6297 self._advance() 6298 for keywords in continuations or []: 6299 if isinstance(keywords, str): 6300 keywords = (keywords,) 6301 6302 if self._match_text_seq(*keywords): 6303 option = f"{option} {' '.join(keywords)}" 6304 break 6305 else: 6306 if continuations or continuations is None: 6307 if raise_unmatched: 6308 self.raise_error(f"Unknown option {option}") 6309 6310 self._retreat(index) 6311 return None 6312 6313 return exp.var(option) 6314 6315 def _parse_as_command(self, start: Token) -> exp.Command: 6316 while self._curr: 6317 self._advance() 6318 text = self._find_sql(start, self._prev) 6319 size = len(start.text) 6320 self._warn_unsupported() 6321 return exp.Command(this=text[:size], expression=text[size:]) 6322 6323 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6324 settings = [] 6325 6326 self._match_l_paren() 6327 kind = self._parse_id_var() 6328 6329 if self._match(TokenType.L_PAREN): 6330 while True: 6331 key = self._parse_id_var() 6332 value = self._parse_primary() 6333 6334 if not key and value is None: 6335 break 6336 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6337 self._match(TokenType.R_PAREN) 6338 6339 self._match_r_paren() 6340 6341 return self.expression( 6342 exp.DictProperty, 6343 this=this, 6344 kind=kind.this if kind else None, 6345 settings=settings, 6346 ) 6347 6348 def _parse_dict_range(self, this: str) -> exp.DictRange: 6349 self._match_l_paren() 6350 has_min = self._match_text_seq("MIN") 6351 if has_min: 6352 min = self._parse_var() or self._parse_primary() 6353 self._match_text_seq("MAX") 6354 max = self._parse_var() or self._parse_primary() 6355 else: 6356 max = self._parse_var() or self._parse_primary() 6357 min = exp.Literal.number(0) 6358 self._match_r_paren() 6359 return self.expression(exp.DictRange, this=this, min=min, max=max) 6360 6361 def _parse_comprehension( 6362 self, this: t.Optional[exp.Expression] 6363 ) -> t.Optional[exp.Comprehension]: 6364 index = self._index 6365 expression = self._parse_column() 6366 if not self._match(TokenType.IN): 6367 self._retreat(index - 1) 6368 return None 6369 iterator = self._parse_column() 6370 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6371 return self.expression( 6372 exp.Comprehension, 6373 this=this, 6374 expression=expression, 6375 iterator=iterator, 6376 condition=condition, 6377 ) 6378 6379 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6380 if self._match(TokenType.HEREDOC_STRING): 6381 return self.expression(exp.Heredoc, this=self._prev.text) 6382 6383 if not self._match_text_seq("$"): 6384 return None 6385 6386 tags = ["$"] 6387 tag_text = None 6388 6389 if self._is_connected(): 6390 self._advance() 6391 tags.append(self._prev.text.upper()) 6392 else: 6393 self.raise_error("No closing $ found") 6394 6395 if tags[-1] != "$": 6396 if self._is_connected() and self._match_text_seq("$"): 6397 tag_text = tags[-1] 6398 tags.append("$") 6399 else: 6400 self.raise_error("No closing $ found") 6401 6402 heredoc_start = self._curr 6403 6404 while self._curr: 6405 if self._match_text_seq(*tags, advance=False): 6406 this = self._find_sql(heredoc_start, self._prev) 6407 self._advance(len(tags)) 6408 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6409 6410 self._advance() 6411 6412 self.raise_error(f"No closing {''.join(tags)} found") 6413 return None 6414 6415 def _find_parser( 6416 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6417 ) -> t.Optional[t.Callable]: 6418 if not self._curr: 6419 return None 6420 6421 index = self._index 6422 this = [] 6423 while True: 6424 # The current token might be multiple words 6425 curr = self._curr.text.upper() 6426 key = curr.split(" ") 6427 this.append(curr) 6428 6429 self._advance() 6430 result, trie = in_trie(trie, key) 6431 if result == TrieResult.FAILED: 6432 break 6433 6434 if result == TrieResult.EXISTS: 6435 subparser = parsers[" ".join(this)] 6436 return subparser 6437 6438 self._retreat(index) 6439 return None 6440 6441 def _match(self, token_type, advance=True, expression=None): 6442 if not self._curr: 6443 return None 6444 6445 if self._curr.token_type == token_type: 6446 if advance: 6447 self._advance() 6448 self._add_comments(expression) 6449 return True 6450 6451 return None 6452 6453 def _match_set(self, types, advance=True): 6454 if not self._curr: 6455 return None 6456 6457 if self._curr.token_type in types: 6458 if advance: 6459 self._advance() 6460 return True 6461 6462 return None 6463 6464 def _match_pair(self, token_type_a, token_type_b, advance=True): 6465 if not self._curr or not self._next: 6466 return None 6467 6468 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6469 if advance: 6470 self._advance(2) 6471 return True 6472 6473 return None 6474 6475 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6476 if not self._match(TokenType.L_PAREN, expression=expression): 6477 self.raise_error("Expecting (") 6478 6479 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6480 if not self._match(TokenType.R_PAREN, expression=expression): 6481 self.raise_error("Expecting )") 6482 6483 def _match_texts(self, texts, advance=True): 6484 if self._curr and self._curr.text.upper() in texts: 6485 if advance: 6486 self._advance() 6487 return True 6488 return None 6489 6490 def _match_text_seq(self, *texts, advance=True): 6491 index = self._index 6492 for text in texts: 6493 if self._curr and self._curr.text.upper() == text: 6494 self._advance() 6495 else: 6496 self._retreat(index) 6497 return None 6498 6499 if not advance: 6500 self._retreat(index) 6501 6502 return True 6503 6504 def _replace_lambda( 6505 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6506 ) -> t.Optional[exp.Expression]: 6507 if not node: 6508 return node 6509 6510 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6511 6512 for column in node.find_all(exp.Column): 6513 typ = lambda_types.get(column.parts[0].name) 6514 if typ is not None: 6515 dot_or_id = column.to_dot() if column.table else column.this 6516 6517 if typ: 6518 dot_or_id = self.expression( 6519 exp.Cast, 6520 this=dot_or_id, 6521 to=typ, 6522 ) 6523 6524 parent = column.parent 6525 6526 while isinstance(parent, exp.Dot): 6527 if not isinstance(parent.parent, exp.Dot): 6528 parent.replace(dot_or_id) 6529 break 6530 parent = parent.parent 6531 else: 6532 if column is node: 6533 node = dot_or_id 6534 else: 6535 column.replace(dot_or_id) 6536 return node 6537 6538 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6539 start = self._prev 6540 6541 # Not to be confused with TRUNCATE(number, decimals) function call 6542 if self._match(TokenType.L_PAREN): 6543 self._retreat(self._index - 2) 6544 return self._parse_function() 6545 6546 # Clickhouse supports TRUNCATE DATABASE as well 6547 is_database = self._match(TokenType.DATABASE) 6548 6549 self._match(TokenType.TABLE) 6550 6551 exists = self._parse_exists(not_=False) 6552 6553 expressions = self._parse_csv( 6554 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6555 ) 6556 6557 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6558 6559 if self._match_text_seq("RESTART", "IDENTITY"): 6560 identity = "RESTART" 6561 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6562 identity = "CONTINUE" 6563 else: 6564 identity = None 6565 6566 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6567 option = self._prev.text 6568 else: 6569 option = None 6570 6571 partition = self._parse_partition() 6572 6573 # Fallback case 6574 if self._curr: 6575 return self._parse_as_command(start) 6576 6577 return self.expression( 6578 exp.TruncateTable, 6579 expressions=expressions, 6580 is_database=is_database, 6581 exists=exists, 6582 cluster=cluster, 6583 identity=identity, 6584 option=option, 6585 partition=partition, 6586 ) 6587 6588 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6589 this = self._parse_ordered(self._parse_opclass) 6590 6591 if not self._match(TokenType.WITH): 6592 return this 6593 6594 op = self._parse_var(any_token=True) 6595 6596 return self.expression(exp.WithOperator, this=this, op=op) 6597 6598 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6599 opts = [] 6600 self._match(TokenType.EQ) 6601 self._match(TokenType.L_PAREN) 6602 while self._curr and not self._match(TokenType.R_PAREN): 6603 opts.append(self._parse_conjunction()) 6604 self._match(TokenType.COMMA) 6605 return opts 6606 6607 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6608 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6609 6610 options = [] 6611 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6612 option = self._parse_unquoted_field() 6613 value = None 6614 6615 # Some options are defined as functions with the values as params 6616 if not isinstance(option, exp.Func): 6617 prev = self._prev.text.upper() 6618 # Different dialects might separate options and values by white space, "=" and "AS" 6619 self._match(TokenType.EQ) 6620 self._match(TokenType.ALIAS) 6621 6622 if prev == "FILE_FORMAT" and self._match(TokenType.L_PAREN): 6623 # Snowflake FILE_FORMAT case 6624 value = self._parse_wrapped_options() 6625 else: 6626 value = self._parse_unquoted_field() 6627 6628 param = self.expression(exp.CopyParameter, this=option, expression=value) 6629 options.append(param) 6630 6631 if sep: 6632 self._match(sep) 6633 6634 return options 6635 6636 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6637 expr = self.expression(exp.Credentials) 6638 6639 if self._match_text_seq("STORAGE_INTEGRATION", advance=False): 6640 expr.set("storage", self._parse_conjunction()) 6641 if self._match_text_seq("CREDENTIALS"): 6642 # Snowflake supports CREDENTIALS = (...), while Redshift CREDENTIALS <string> 6643 creds = ( 6644 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6645 ) 6646 expr.set("credentials", creds) 6647 if self._match_text_seq("ENCRYPTION"): 6648 expr.set("encryption", self._parse_wrapped_options()) 6649 if self._match_text_seq("IAM_ROLE"): 6650 expr.set("iam_role", self._parse_field()) 6651 if self._match_text_seq("REGION"): 6652 expr.set("region", self._parse_field()) 6653 6654 return expr 6655 6656 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6657 return self._parse_field() 6658 6659 def _parse_copy(self) -> exp.Copy | exp.Command: 6660 start = self._prev 6661 6662 self._match(TokenType.INTO) 6663 6664 this = ( 6665 self._parse_conjunction() 6666 if self._match(TokenType.L_PAREN, advance=False) 6667 else self._parse_table(schema=True) 6668 ) 6669 6670 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6671 6672 files = self._parse_csv(self._parse_file_location) 6673 credentials = self._parse_credentials() 6674 6675 self._match_text_seq("WITH") 6676 6677 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6678 6679 # Fallback case 6680 if self._curr: 6681 return self._parse_as_command(start) 6682 6683 return self.expression( 6684 exp.Copy, 6685 this=this, 6686 kind=kind, 6687 credentials=credentials, 6688 files=files, 6689 params=params, 6690 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1208 def __init__( 1209 self, 1210 error_level: t.Optional[ErrorLevel] = None, 1211 error_message_context: int = 100, 1212 max_errors: int = 3, 1213 dialect: DialectType = None, 1214 ): 1215 from sqlglot.dialects import Dialect 1216 1217 self.error_level = error_level or ErrorLevel.IMMEDIATE 1218 self.error_message_context = error_message_context 1219 self.max_errors = max_errors 1220 self.dialect = Dialect.get_or_raise(dialect) 1221 self.reset()
1233 def parse( 1234 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1235 ) -> t.List[t.Optional[exp.Expression]]: 1236 """ 1237 Parses a list of tokens and returns a list of syntax trees, one tree 1238 per parsed SQL statement. 1239 1240 Args: 1241 raw_tokens: The list of tokens. 1242 sql: The original SQL string, used to produce helpful debug messages. 1243 1244 Returns: 1245 The list of the produced syntax trees. 1246 """ 1247 return self._parse( 1248 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1249 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1251 def parse_into( 1252 self, 1253 expression_types: exp.IntoType, 1254 raw_tokens: t.List[Token], 1255 sql: t.Optional[str] = None, 1256 ) -> t.List[t.Optional[exp.Expression]]: 1257 """ 1258 Parses a list of tokens into a given Expression type. If a collection of Expression 1259 types is given instead, this method will try to parse the token list into each one 1260 of them, stopping at the first for which the parsing succeeds. 1261 1262 Args: 1263 expression_types: The expression type(s) to try and parse the token list into. 1264 raw_tokens: The list of tokens. 1265 sql: The original SQL string, used to produce helpful debug messages. 1266 1267 Returns: 1268 The target Expression. 1269 """ 1270 errors = [] 1271 for expression_type in ensure_list(expression_types): 1272 parser = self.EXPRESSION_PARSERS.get(expression_type) 1273 if not parser: 1274 raise TypeError(f"No parser registered for {expression_type}") 1275 1276 try: 1277 return self._parse(parser, raw_tokens, sql) 1278 except ParseError as e: 1279 e.errors[0]["into_expression"] = expression_type 1280 errors.append(e) 1281 1282 raise ParseError( 1283 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1284 errors=merge_errors(errors), 1285 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1325 def check_errors(self) -> None: 1326 """Logs or raises any found errors, depending on the chosen error level setting.""" 1327 if self.error_level == ErrorLevel.WARN: 1328 for error in self.errors: 1329 logger.error(str(error)) 1330 elif self.error_level == ErrorLevel.RAISE and self.errors: 1331 raise ParseError( 1332 concat_messages(self.errors, self.max_errors), 1333 errors=merge_errors(self.errors), 1334 )
Logs or raises any found errors, depending on the chosen error level setting.
1336 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1337 """ 1338 Appends an error in the list of recorded errors or raises it, depending on the chosen 1339 error level setting. 1340 """ 1341 token = token or self._curr or self._prev or Token.string("") 1342 start = token.start 1343 end = token.end + 1 1344 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1345 highlight = self.sql[start:end] 1346 end_context = self.sql[end : end + self.error_message_context] 1347 1348 error = ParseError.new( 1349 f"{message}. Line {token.line}, Col: {token.col}.\n" 1350 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1351 description=message, 1352 line=token.line, 1353 col=token.col, 1354 start_context=start_context, 1355 highlight=highlight, 1356 end_context=end_context, 1357 ) 1358 1359 if self.error_level == ErrorLevel.IMMEDIATE: 1360 raise error 1361 1362 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1364 def expression( 1365 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1366 ) -> E: 1367 """ 1368 Creates a new, validated Expression. 1369 1370 Args: 1371 exp_class: The expression class to instantiate. 1372 comments: An optional list of comments to attach to the expression. 1373 kwargs: The arguments to set for the expression along with their respective values. 1374 1375 Returns: 1376 The target expression. 1377 """ 1378 instance = exp_class(**kwargs) 1379 instance.add_comments(comments) if comments else self._add_comments(instance) 1380 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1387 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1388 """ 1389 Validates an Expression, making sure that all its mandatory arguments are set. 1390 1391 Args: 1392 expression: The expression to validate. 1393 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1394 1395 Returns: 1396 The validated expression. 1397 """ 1398 if self.error_level != ErrorLevel.IGNORE: 1399 for error_message in expression.error_messages(args): 1400 self.raise_error(error_message) 1401 1402 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.