sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 return lambda self, this: self._parse_escape( 47 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 48 ) 49 50 51def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 62 63 64def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 65 def _builder(args: t.List, dialect: Dialect) -> E: 66 expression = expr_type( 67 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 68 ) 69 if len(args) > 2 and expr_type is exp.JSONExtract: 70 expression.set("expressions", args[2:]) 71 72 return expression 73 74 return _builder 75 76 77def build_mod(args: t.List) -> exp.Mod: 78 this = seq_get(args, 0) 79 expression = seq_get(args, 1) 80 81 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 82 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 83 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 84 85 return exp.Mod(this=this, expression=expression) 86 87 88class _Parser(type): 89 def __new__(cls, clsname, bases, attrs): 90 klass = super().__new__(cls, clsname, bases, attrs) 91 92 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 93 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 94 95 return klass 96 97 98class Parser(metaclass=_Parser): 99 """ 100 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 101 102 Args: 103 error_level: The desired error level. 104 Default: ErrorLevel.IMMEDIATE 105 error_message_context: The amount of context to capture from a query string when displaying 106 the error message (in number of characters). 107 Default: 100 108 max_errors: Maximum number of error messages to include in a raised ParseError. 109 This is only relevant if error_level is ErrorLevel.RAISE. 110 Default: 3 111 """ 112 113 FUNCTIONS: t.Dict[str, t.Callable] = { 114 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 115 "CONCAT": lambda args, dialect: exp.Concat( 116 expressions=args, 117 safe=not dialect.STRICT_STRING_CONCAT, 118 coalesce=dialect.CONCAT_COALESCE, 119 ), 120 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 121 expressions=args, 122 safe=not dialect.STRICT_STRING_CONCAT, 123 coalesce=dialect.CONCAT_COALESCE, 124 ), 125 "DATE_TO_DATE_STR": lambda args: exp.Cast( 126 this=seq_get(args, 0), 127 to=exp.DataType(this=exp.DataType.Type.TEXT), 128 ), 129 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 130 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 131 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 132 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 133 "LIKE": build_like, 134 "LOG": build_logarithm, 135 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 136 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 137 "MOD": build_mod, 138 "TIME_TO_TIME_STR": lambda args: exp.Cast( 139 this=seq_get(args, 0), 140 to=exp.DataType(this=exp.DataType.Type.TEXT), 141 ), 142 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 143 this=exp.Cast( 144 this=seq_get(args, 0), 145 to=exp.DataType(this=exp.DataType.Type.TEXT), 146 ), 147 start=exp.Literal.number(1), 148 length=exp.Literal.number(10), 149 ), 150 "VAR_MAP": build_var_map, 151 } 152 153 NO_PAREN_FUNCTIONS = { 154 TokenType.CURRENT_DATE: exp.CurrentDate, 155 TokenType.CURRENT_DATETIME: exp.CurrentDate, 156 TokenType.CURRENT_TIME: exp.CurrentTime, 157 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 158 TokenType.CURRENT_USER: exp.CurrentUser, 159 } 160 161 STRUCT_TYPE_TOKENS = { 162 TokenType.NESTED, 163 TokenType.OBJECT, 164 TokenType.STRUCT, 165 } 166 167 NESTED_TYPE_TOKENS = { 168 TokenType.ARRAY, 169 TokenType.LOWCARDINALITY, 170 TokenType.MAP, 171 TokenType.NULLABLE, 172 *STRUCT_TYPE_TOKENS, 173 } 174 175 ENUM_TYPE_TOKENS = { 176 TokenType.ENUM, 177 TokenType.ENUM8, 178 TokenType.ENUM16, 179 } 180 181 AGGREGATE_TYPE_TOKENS = { 182 TokenType.AGGREGATEFUNCTION, 183 TokenType.SIMPLEAGGREGATEFUNCTION, 184 } 185 186 TYPE_TOKENS = { 187 TokenType.BIT, 188 TokenType.BOOLEAN, 189 TokenType.TINYINT, 190 TokenType.UTINYINT, 191 TokenType.SMALLINT, 192 TokenType.USMALLINT, 193 TokenType.INT, 194 TokenType.UINT, 195 TokenType.BIGINT, 196 TokenType.UBIGINT, 197 TokenType.INT128, 198 TokenType.UINT128, 199 TokenType.INT256, 200 TokenType.UINT256, 201 TokenType.MEDIUMINT, 202 TokenType.UMEDIUMINT, 203 TokenType.FIXEDSTRING, 204 TokenType.FLOAT, 205 TokenType.DOUBLE, 206 TokenType.CHAR, 207 TokenType.NCHAR, 208 TokenType.VARCHAR, 209 TokenType.NVARCHAR, 210 TokenType.BPCHAR, 211 TokenType.TEXT, 212 TokenType.MEDIUMTEXT, 213 TokenType.LONGTEXT, 214 TokenType.MEDIUMBLOB, 215 TokenType.LONGBLOB, 216 TokenType.BINARY, 217 TokenType.VARBINARY, 218 TokenType.JSON, 219 TokenType.JSONB, 220 TokenType.INTERVAL, 221 TokenType.TINYBLOB, 222 TokenType.TINYTEXT, 223 TokenType.TIME, 224 TokenType.TIMETZ, 225 TokenType.TIMESTAMP, 226 TokenType.TIMESTAMP_S, 227 TokenType.TIMESTAMP_MS, 228 TokenType.TIMESTAMP_NS, 229 TokenType.TIMESTAMPTZ, 230 TokenType.TIMESTAMPLTZ, 231 TokenType.TIMESTAMPNTZ, 232 TokenType.DATETIME, 233 TokenType.DATETIME64, 234 TokenType.DATE, 235 TokenType.DATE32, 236 TokenType.INT4RANGE, 237 TokenType.INT4MULTIRANGE, 238 TokenType.INT8RANGE, 239 TokenType.INT8MULTIRANGE, 240 TokenType.NUMRANGE, 241 TokenType.NUMMULTIRANGE, 242 TokenType.TSRANGE, 243 TokenType.TSMULTIRANGE, 244 TokenType.TSTZRANGE, 245 TokenType.TSTZMULTIRANGE, 246 TokenType.DATERANGE, 247 TokenType.DATEMULTIRANGE, 248 TokenType.DECIMAL, 249 TokenType.UDECIMAL, 250 TokenType.BIGDECIMAL, 251 TokenType.UUID, 252 TokenType.GEOGRAPHY, 253 TokenType.GEOMETRY, 254 TokenType.HLLSKETCH, 255 TokenType.HSTORE, 256 TokenType.PSEUDO_TYPE, 257 TokenType.SUPER, 258 TokenType.SERIAL, 259 TokenType.SMALLSERIAL, 260 TokenType.BIGSERIAL, 261 TokenType.XML, 262 TokenType.YEAR, 263 TokenType.UNIQUEIDENTIFIER, 264 TokenType.USERDEFINED, 265 TokenType.MONEY, 266 TokenType.SMALLMONEY, 267 TokenType.ROWVERSION, 268 TokenType.IMAGE, 269 TokenType.VARIANT, 270 TokenType.OBJECT, 271 TokenType.OBJECT_IDENTIFIER, 272 TokenType.INET, 273 TokenType.IPADDRESS, 274 TokenType.IPPREFIX, 275 TokenType.IPV4, 276 TokenType.IPV6, 277 TokenType.UNKNOWN, 278 TokenType.NULL, 279 TokenType.NAME, 280 TokenType.TDIGEST, 281 *ENUM_TYPE_TOKENS, 282 *NESTED_TYPE_TOKENS, 283 *AGGREGATE_TYPE_TOKENS, 284 } 285 286 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 287 TokenType.BIGINT: TokenType.UBIGINT, 288 TokenType.INT: TokenType.UINT, 289 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 290 TokenType.SMALLINT: TokenType.USMALLINT, 291 TokenType.TINYINT: TokenType.UTINYINT, 292 TokenType.DECIMAL: TokenType.UDECIMAL, 293 } 294 295 SUBQUERY_PREDICATES = { 296 TokenType.ANY: exp.Any, 297 TokenType.ALL: exp.All, 298 TokenType.EXISTS: exp.Exists, 299 TokenType.SOME: exp.Any, 300 } 301 302 RESERVED_TOKENS = { 303 *Tokenizer.SINGLE_TOKENS.values(), 304 TokenType.SELECT, 305 } - {TokenType.IDENTIFIER} 306 307 DB_CREATABLES = { 308 TokenType.DATABASE, 309 TokenType.SCHEMA, 310 TokenType.TABLE, 311 TokenType.VIEW, 312 TokenType.MODEL, 313 TokenType.DICTIONARY, 314 TokenType.SEQUENCE, 315 TokenType.STORAGE_INTEGRATION, 316 } 317 318 CREATABLES = { 319 TokenType.COLUMN, 320 TokenType.CONSTRAINT, 321 TokenType.FUNCTION, 322 TokenType.INDEX, 323 TokenType.PROCEDURE, 324 TokenType.FOREIGN_KEY, 325 *DB_CREATABLES, 326 } 327 328 # Tokens that can represent identifiers 329 ID_VAR_TOKENS = { 330 TokenType.VAR, 331 TokenType.ANTI, 332 TokenType.APPLY, 333 TokenType.ASC, 334 TokenType.ASOF, 335 TokenType.AUTO_INCREMENT, 336 TokenType.BEGIN, 337 TokenType.BPCHAR, 338 TokenType.CACHE, 339 TokenType.CASE, 340 TokenType.COLLATE, 341 TokenType.COMMAND, 342 TokenType.COMMENT, 343 TokenType.COMMIT, 344 TokenType.CONSTRAINT, 345 TokenType.COPY, 346 TokenType.DEFAULT, 347 TokenType.DELETE, 348 TokenType.DESC, 349 TokenType.DESCRIBE, 350 TokenType.DICTIONARY, 351 TokenType.DIV, 352 TokenType.END, 353 TokenType.EXECUTE, 354 TokenType.ESCAPE, 355 TokenType.FALSE, 356 TokenType.FIRST, 357 TokenType.FILTER, 358 TokenType.FINAL, 359 TokenType.FORMAT, 360 TokenType.FULL, 361 TokenType.IDENTIFIER, 362 TokenType.IS, 363 TokenType.ISNULL, 364 TokenType.INTERVAL, 365 TokenType.KEEP, 366 TokenType.KILL, 367 TokenType.LEFT, 368 TokenType.LOAD, 369 TokenType.MERGE, 370 TokenType.NATURAL, 371 TokenType.NEXT, 372 TokenType.OFFSET, 373 TokenType.OPERATOR, 374 TokenType.ORDINALITY, 375 TokenType.OVERLAPS, 376 TokenType.OVERWRITE, 377 TokenType.PARTITION, 378 TokenType.PERCENT, 379 TokenType.PIVOT, 380 TokenType.PRAGMA, 381 TokenType.RANGE, 382 TokenType.RECURSIVE, 383 TokenType.REFERENCES, 384 TokenType.REFRESH, 385 TokenType.REPLACE, 386 TokenType.RIGHT, 387 TokenType.ROW, 388 TokenType.ROWS, 389 TokenType.SEMI, 390 TokenType.SET, 391 TokenType.SETTINGS, 392 TokenType.SHOW, 393 TokenType.TEMPORARY, 394 TokenType.TOP, 395 TokenType.TRUE, 396 TokenType.TRUNCATE, 397 TokenType.UNIQUE, 398 TokenType.UNPIVOT, 399 TokenType.UPDATE, 400 TokenType.USE, 401 TokenType.VOLATILE, 402 TokenType.WINDOW, 403 *CREATABLES, 404 *SUBQUERY_PREDICATES, 405 *TYPE_TOKENS, 406 *NO_PAREN_FUNCTIONS, 407 } 408 409 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 410 411 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 412 TokenType.ANTI, 413 TokenType.APPLY, 414 TokenType.ASOF, 415 TokenType.FULL, 416 TokenType.LEFT, 417 TokenType.LOCK, 418 TokenType.NATURAL, 419 TokenType.OFFSET, 420 TokenType.RIGHT, 421 TokenType.SEMI, 422 TokenType.WINDOW, 423 } 424 425 ALIAS_TOKENS = ID_VAR_TOKENS 426 427 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 428 429 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 430 431 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 432 433 FUNC_TOKENS = { 434 TokenType.COLLATE, 435 TokenType.COMMAND, 436 TokenType.CURRENT_DATE, 437 TokenType.CURRENT_DATETIME, 438 TokenType.CURRENT_TIMESTAMP, 439 TokenType.CURRENT_TIME, 440 TokenType.CURRENT_USER, 441 TokenType.FILTER, 442 TokenType.FIRST, 443 TokenType.FORMAT, 444 TokenType.GLOB, 445 TokenType.IDENTIFIER, 446 TokenType.INDEX, 447 TokenType.ISNULL, 448 TokenType.ILIKE, 449 TokenType.INSERT, 450 TokenType.LIKE, 451 TokenType.MERGE, 452 TokenType.OFFSET, 453 TokenType.PRIMARY_KEY, 454 TokenType.RANGE, 455 TokenType.REPLACE, 456 TokenType.RLIKE, 457 TokenType.ROW, 458 TokenType.UNNEST, 459 TokenType.VAR, 460 TokenType.LEFT, 461 TokenType.RIGHT, 462 TokenType.SEQUENCE, 463 TokenType.DATE, 464 TokenType.DATETIME, 465 TokenType.TABLE, 466 TokenType.TIMESTAMP, 467 TokenType.TIMESTAMPTZ, 468 TokenType.TRUNCATE, 469 TokenType.WINDOW, 470 TokenType.XOR, 471 *TYPE_TOKENS, 472 *SUBQUERY_PREDICATES, 473 } 474 475 CONJUNCTION = { 476 TokenType.AND: exp.And, 477 TokenType.OR: exp.Or, 478 } 479 480 EQUALITY = { 481 TokenType.COLON_EQ: exp.PropertyEQ, 482 TokenType.EQ: exp.EQ, 483 TokenType.NEQ: exp.NEQ, 484 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 485 } 486 487 COMPARISON = { 488 TokenType.GT: exp.GT, 489 TokenType.GTE: exp.GTE, 490 TokenType.LT: exp.LT, 491 TokenType.LTE: exp.LTE, 492 } 493 494 BITWISE = { 495 TokenType.AMP: exp.BitwiseAnd, 496 TokenType.CARET: exp.BitwiseXor, 497 TokenType.PIPE: exp.BitwiseOr, 498 } 499 500 TERM = { 501 TokenType.DASH: exp.Sub, 502 TokenType.PLUS: exp.Add, 503 TokenType.MOD: exp.Mod, 504 TokenType.COLLATE: exp.Collate, 505 } 506 507 FACTOR = { 508 TokenType.DIV: exp.IntDiv, 509 TokenType.LR_ARROW: exp.Distance, 510 TokenType.SLASH: exp.Div, 511 TokenType.STAR: exp.Mul, 512 } 513 514 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 515 516 TIMES = { 517 TokenType.TIME, 518 TokenType.TIMETZ, 519 } 520 521 TIMESTAMPS = { 522 TokenType.TIMESTAMP, 523 TokenType.TIMESTAMPTZ, 524 TokenType.TIMESTAMPLTZ, 525 *TIMES, 526 } 527 528 SET_OPERATIONS = { 529 TokenType.UNION, 530 TokenType.INTERSECT, 531 TokenType.EXCEPT, 532 } 533 534 JOIN_METHODS = { 535 TokenType.ASOF, 536 TokenType.NATURAL, 537 TokenType.POSITIONAL, 538 } 539 540 JOIN_SIDES = { 541 TokenType.LEFT, 542 TokenType.RIGHT, 543 TokenType.FULL, 544 } 545 546 JOIN_KINDS = { 547 TokenType.INNER, 548 TokenType.OUTER, 549 TokenType.CROSS, 550 TokenType.SEMI, 551 TokenType.ANTI, 552 } 553 554 JOIN_HINTS: t.Set[str] = set() 555 556 LAMBDAS = { 557 TokenType.ARROW: lambda self, expressions: self.expression( 558 exp.Lambda, 559 this=self._replace_lambda( 560 self._parse_conjunction(), 561 {node.name for node in expressions}, 562 ), 563 expressions=expressions, 564 ), 565 TokenType.FARROW: lambda self, expressions: self.expression( 566 exp.Kwarg, 567 this=exp.var(expressions[0].name), 568 expression=self._parse_conjunction(), 569 ), 570 } 571 572 COLUMN_OPERATORS = { 573 TokenType.DOT: None, 574 TokenType.DCOLON: lambda self, this, to: self.expression( 575 exp.Cast if self.STRICT_CAST else exp.TryCast, 576 this=this, 577 to=to, 578 ), 579 TokenType.ARROW: lambda self, this, path: self.expression( 580 exp.JSONExtract, 581 this=this, 582 expression=self.dialect.to_json_path(path), 583 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 584 ), 585 TokenType.DARROW: lambda self, this, path: self.expression( 586 exp.JSONExtractScalar, 587 this=this, 588 expression=self.dialect.to_json_path(path), 589 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 590 ), 591 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 592 exp.JSONBExtract, 593 this=this, 594 expression=path, 595 ), 596 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 597 exp.JSONBExtractScalar, 598 this=this, 599 expression=path, 600 ), 601 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 602 exp.JSONBContains, 603 this=this, 604 expression=key, 605 ), 606 } 607 608 EXPRESSION_PARSERS = { 609 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 610 exp.Column: lambda self: self._parse_column(), 611 exp.Condition: lambda self: self._parse_conjunction(), 612 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 613 exp.Expression: lambda self: self._parse_expression(), 614 exp.From: lambda self: self._parse_from(joins=True), 615 exp.Group: lambda self: self._parse_group(), 616 exp.Having: lambda self: self._parse_having(), 617 exp.Identifier: lambda self: self._parse_id_var(), 618 exp.Join: lambda self: self._parse_join(), 619 exp.Lambda: lambda self: self._parse_lambda(), 620 exp.Lateral: lambda self: self._parse_lateral(), 621 exp.Limit: lambda self: self._parse_limit(), 622 exp.Offset: lambda self: self._parse_offset(), 623 exp.Order: lambda self: self._parse_order(), 624 exp.Ordered: lambda self: self._parse_ordered(), 625 exp.Properties: lambda self: self._parse_properties(), 626 exp.Qualify: lambda self: self._parse_qualify(), 627 exp.Returning: lambda self: self._parse_returning(), 628 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 629 exp.Table: lambda self: self._parse_table_parts(), 630 exp.TableAlias: lambda self: self._parse_table_alias(), 631 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 632 exp.Where: lambda self: self._parse_where(), 633 exp.Window: lambda self: self._parse_named_window(), 634 exp.With: lambda self: self._parse_with(), 635 "JOIN_TYPE": lambda self: self._parse_join_parts(), 636 } 637 638 STATEMENT_PARSERS = { 639 TokenType.ALTER: lambda self: self._parse_alter(), 640 TokenType.BEGIN: lambda self: self._parse_transaction(), 641 TokenType.CACHE: lambda self: self._parse_cache(), 642 TokenType.COMMENT: lambda self: self._parse_comment(), 643 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 644 TokenType.COPY: lambda self: self._parse_copy(), 645 TokenType.CREATE: lambda self: self._parse_create(), 646 TokenType.DELETE: lambda self: self._parse_delete(), 647 TokenType.DESC: lambda self: self._parse_describe(), 648 TokenType.DESCRIBE: lambda self: self._parse_describe(), 649 TokenType.DROP: lambda self: self._parse_drop(), 650 TokenType.INSERT: lambda self: self._parse_insert(), 651 TokenType.KILL: lambda self: self._parse_kill(), 652 TokenType.LOAD: lambda self: self._parse_load(), 653 TokenType.MERGE: lambda self: self._parse_merge(), 654 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 655 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 656 TokenType.REFRESH: lambda self: self._parse_refresh(), 657 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 658 TokenType.SET: lambda self: self._parse_set(), 659 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 660 TokenType.UNCACHE: lambda self: self._parse_uncache(), 661 TokenType.UPDATE: lambda self: self._parse_update(), 662 TokenType.USE: lambda self: self.expression( 663 exp.Use, 664 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 665 this=self._parse_table(schema=False), 666 ), 667 } 668 669 UNARY_PARSERS = { 670 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 671 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 672 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 673 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 674 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 675 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 676 } 677 678 STRING_PARSERS = { 679 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 680 exp.RawString, this=token.text 681 ), 682 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 683 exp.National, this=token.text 684 ), 685 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 686 TokenType.STRING: lambda self, token: self.expression( 687 exp.Literal, this=token.text, is_string=True 688 ), 689 TokenType.UNICODE_STRING: lambda self, token: self.expression( 690 exp.UnicodeString, 691 this=token.text, 692 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 693 ), 694 } 695 696 NUMERIC_PARSERS = { 697 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 698 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 699 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 700 TokenType.NUMBER: lambda self, token: self.expression( 701 exp.Literal, this=token.text, is_string=False 702 ), 703 } 704 705 PRIMARY_PARSERS = { 706 **STRING_PARSERS, 707 **NUMERIC_PARSERS, 708 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 709 TokenType.NULL: lambda self, _: self.expression(exp.Null), 710 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 711 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 712 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 713 TokenType.STAR: lambda self, _: self.expression( 714 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 715 ), 716 } 717 718 PLACEHOLDER_PARSERS = { 719 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 720 TokenType.PARAMETER: lambda self: self._parse_parameter(), 721 TokenType.COLON: lambda self: ( 722 self.expression(exp.Placeholder, this=self._prev.text) 723 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 724 else None 725 ), 726 } 727 728 RANGE_PARSERS = { 729 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 730 TokenType.GLOB: binary_range_parser(exp.Glob), 731 TokenType.ILIKE: binary_range_parser(exp.ILike), 732 TokenType.IN: lambda self, this: self._parse_in(this), 733 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 734 TokenType.IS: lambda self, this: self._parse_is(this), 735 TokenType.LIKE: binary_range_parser(exp.Like), 736 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 737 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 738 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 739 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 740 } 741 742 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 743 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 744 "AUTO": lambda self: self._parse_auto_property(), 745 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 746 "BACKUP": lambda self: self.expression( 747 exp.BackupProperty, this=self._parse_var(any_token=True) 748 ), 749 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 750 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 751 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 752 "CHECKSUM": lambda self: self._parse_checksum(), 753 "CLUSTER BY": lambda self: self._parse_cluster(), 754 "CLUSTERED": lambda self: self._parse_clustered_by(), 755 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 756 exp.CollateProperty, **kwargs 757 ), 758 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 759 "CONTAINS": lambda self: self._parse_contains_property(), 760 "COPY": lambda self: self._parse_copy_property(), 761 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 762 "DEFINER": lambda self: self._parse_definer(), 763 "DETERMINISTIC": lambda self: self.expression( 764 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 765 ), 766 "DISTKEY": lambda self: self._parse_distkey(), 767 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 768 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 769 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 770 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 771 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 772 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 773 "FREESPACE": lambda self: self._parse_freespace(), 774 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 775 "HEAP": lambda self: self.expression(exp.HeapProperty), 776 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 777 "IMMUTABLE": lambda self: self.expression( 778 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 779 ), 780 "INHERITS": lambda self: self.expression( 781 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 782 ), 783 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 784 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 785 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 786 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 787 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 788 "LIKE": lambda self: self._parse_create_like(), 789 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 790 "LOCK": lambda self: self._parse_locking(), 791 "LOCKING": lambda self: self._parse_locking(), 792 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 793 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 794 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 795 "MODIFIES": lambda self: self._parse_modifies_property(), 796 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 797 "NO": lambda self: self._parse_no_property(), 798 "ON": lambda self: self._parse_on_property(), 799 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 800 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 801 "PARTITION": lambda self: self._parse_partitioned_of(), 802 "PARTITION BY": lambda self: self._parse_partitioned_by(), 803 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 804 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 805 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 806 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 807 "READS": lambda self: self._parse_reads_property(), 808 "REMOTE": lambda self: self._parse_remote_with_connection(), 809 "RETURNS": lambda self: self._parse_returns(), 810 "ROW": lambda self: self._parse_row(), 811 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 812 "SAMPLE": lambda self: self.expression( 813 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 814 ), 815 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 816 "SETTINGS": lambda self: self.expression( 817 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 818 ), 819 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 820 "SORTKEY": lambda self: self._parse_sortkey(), 821 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 822 "STABLE": lambda self: self.expression( 823 exp.StabilityProperty, this=exp.Literal.string("STABLE") 824 ), 825 "STORED": lambda self: self._parse_stored(), 826 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 827 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 828 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 829 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 830 "TO": lambda self: self._parse_to_table(), 831 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 832 "TRANSFORM": lambda self: self.expression( 833 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 834 ), 835 "TTL": lambda self: self._parse_ttl(), 836 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 837 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 838 "VOLATILE": lambda self: self._parse_volatile_property(), 839 "WITH": lambda self: self._parse_with_property(), 840 } 841 842 CONSTRAINT_PARSERS = { 843 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 844 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 845 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 846 "CHARACTER SET": lambda self: self.expression( 847 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 848 ), 849 "CHECK": lambda self: self.expression( 850 exp.CheckColumnConstraint, 851 this=self._parse_wrapped(self._parse_conjunction), 852 enforced=self._match_text_seq("ENFORCED"), 853 ), 854 "COLLATE": lambda self: self.expression( 855 exp.CollateColumnConstraint, this=self._parse_var() 856 ), 857 "COMMENT": lambda self: self.expression( 858 exp.CommentColumnConstraint, this=self._parse_string() 859 ), 860 "COMPRESS": lambda self: self._parse_compress(), 861 "CLUSTERED": lambda self: self.expression( 862 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 863 ), 864 "NONCLUSTERED": lambda self: self.expression( 865 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 866 ), 867 "DEFAULT": lambda self: self.expression( 868 exp.DefaultColumnConstraint, this=self._parse_bitwise() 869 ), 870 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 871 "EPHEMERAL": lambda self: self.expression( 872 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 873 ), 874 "EXCLUDE": lambda self: self.expression( 875 exp.ExcludeColumnConstraint, this=self._parse_index_params() 876 ), 877 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 878 "FORMAT": lambda self: self.expression( 879 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 880 ), 881 "GENERATED": lambda self: self._parse_generated_as_identity(), 882 "IDENTITY": lambda self: self._parse_auto_increment(), 883 "INLINE": lambda self: self._parse_inline(), 884 "LIKE": lambda self: self._parse_create_like(), 885 "NOT": lambda self: self._parse_not_constraint(), 886 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 887 "ON": lambda self: ( 888 self._match(TokenType.UPDATE) 889 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 890 ) 891 or self.expression(exp.OnProperty, this=self._parse_id_var()), 892 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 893 "PERIOD": lambda self: self._parse_period_for_system_time(), 894 "PRIMARY KEY": lambda self: self._parse_primary_key(), 895 "REFERENCES": lambda self: self._parse_references(match=False), 896 "TITLE": lambda self: self.expression( 897 exp.TitleColumnConstraint, this=self._parse_var_or_string() 898 ), 899 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 900 "UNIQUE": lambda self: self._parse_unique(), 901 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 902 "WITH": lambda self: self.expression( 903 exp.Properties, expressions=self._parse_wrapped_properties() 904 ), 905 } 906 907 ALTER_PARSERS = { 908 "ADD": lambda self: self._parse_alter_table_add(), 909 "ALTER": lambda self: self._parse_alter_table_alter(), 910 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 911 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 912 "DROP": lambda self: self._parse_alter_table_drop(), 913 "RENAME": lambda self: self._parse_alter_table_rename(), 914 } 915 916 ALTER_ALTER_PARSERS = { 917 "DISTKEY": lambda self: self._parse_alter_diststyle(), 918 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 919 "SORTKEY": lambda self: self._parse_alter_sortkey(), 920 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 921 } 922 923 SCHEMA_UNNAMED_CONSTRAINTS = { 924 "CHECK", 925 "EXCLUDE", 926 "FOREIGN KEY", 927 "LIKE", 928 "PERIOD", 929 "PRIMARY KEY", 930 "UNIQUE", 931 } 932 933 NO_PAREN_FUNCTION_PARSERS = { 934 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 935 "CASE": lambda self: self._parse_case(), 936 "IF": lambda self: self._parse_if(), 937 "NEXT": lambda self: self._parse_next_value_for(), 938 } 939 940 INVALID_FUNC_NAME_TOKENS = { 941 TokenType.IDENTIFIER, 942 TokenType.STRING, 943 } 944 945 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 946 947 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 948 949 FUNCTION_PARSERS = { 950 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 951 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 952 "DECODE": lambda self: self._parse_decode(), 953 "EXTRACT": lambda self: self._parse_extract(), 954 "JSON_OBJECT": lambda self: self._parse_json_object(), 955 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 956 "JSON_TABLE": lambda self: self._parse_json_table(), 957 "MATCH": lambda self: self._parse_match_against(), 958 "OPENJSON": lambda self: self._parse_open_json(), 959 "POSITION": lambda self: self._parse_position(), 960 "PREDICT": lambda self: self._parse_predict(), 961 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 962 "STRING_AGG": lambda self: self._parse_string_agg(), 963 "SUBSTRING": lambda self: self._parse_substring(), 964 "TRIM": lambda self: self._parse_trim(), 965 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 966 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 967 } 968 969 QUERY_MODIFIER_PARSERS = { 970 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 971 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 972 TokenType.WHERE: lambda self: ("where", self._parse_where()), 973 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 974 TokenType.HAVING: lambda self: ("having", self._parse_having()), 975 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 976 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 977 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 978 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 979 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 980 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 981 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 982 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 983 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 984 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 985 TokenType.CLUSTER_BY: lambda self: ( 986 "cluster", 987 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 988 ), 989 TokenType.DISTRIBUTE_BY: lambda self: ( 990 "distribute", 991 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 992 ), 993 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 994 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 995 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 996 } 997 998 SET_PARSERS = { 999 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1000 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1001 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1002 "TRANSACTION": lambda self: self._parse_set_transaction(), 1003 } 1004 1005 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1006 1007 TYPE_LITERAL_PARSERS = { 1008 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1009 } 1010 1011 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1012 1013 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1014 1015 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1016 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1017 "ISOLATION": ( 1018 ("LEVEL", "REPEATABLE", "READ"), 1019 ("LEVEL", "READ", "COMMITTED"), 1020 ("LEVEL", "READ", "UNCOMITTED"), 1021 ("LEVEL", "SERIALIZABLE"), 1022 ), 1023 "READ": ("WRITE", "ONLY"), 1024 } 1025 1026 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1027 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1028 ) 1029 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1030 1031 CREATE_SEQUENCE: OPTIONS_TYPE = { 1032 "SCALE": ("EXTEND", "NOEXTEND"), 1033 "SHARD": ("EXTEND", "NOEXTEND"), 1034 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1035 **dict.fromkeys( 1036 ( 1037 "SESSION", 1038 "GLOBAL", 1039 "KEEP", 1040 "NOKEEP", 1041 "ORDER", 1042 "NOORDER", 1043 "NOCACHE", 1044 "CYCLE", 1045 "NOCYCLE", 1046 "NOMINVALUE", 1047 "NOMAXVALUE", 1048 "NOSCALE", 1049 "NOSHARD", 1050 ), 1051 tuple(), 1052 ), 1053 } 1054 1055 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1056 1057 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1058 1059 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1060 1061 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1062 1063 CLONE_KEYWORDS = {"CLONE", "COPY"} 1064 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1065 1066 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1067 1068 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1069 1070 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1071 1072 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1073 1074 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1075 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1076 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1077 1078 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1079 1080 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1081 1082 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1083 1084 DISTINCT_TOKENS = {TokenType.DISTINCT} 1085 1086 NULL_TOKENS = {TokenType.NULL} 1087 1088 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1089 1090 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1091 1092 STRICT_CAST = True 1093 1094 PREFIXED_PIVOT_COLUMNS = False 1095 IDENTIFY_PIVOT_STRINGS = False 1096 1097 LOG_DEFAULTS_TO_LN = False 1098 1099 # Whether ADD is present for each column added by ALTER TABLE 1100 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1101 1102 # Whether the table sample clause expects CSV syntax 1103 TABLESAMPLE_CSV = False 1104 1105 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1106 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1107 1108 # Whether the TRIM function expects the characters to trim as its first argument 1109 TRIM_PATTERN_FIRST = False 1110 1111 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1112 STRING_ALIASES = False 1113 1114 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1115 MODIFIERS_ATTACHED_TO_UNION = True 1116 UNION_MODIFIERS = {"order", "limit", "offset"} 1117 1118 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1119 NO_PAREN_IF_COMMANDS = True 1120 1121 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1122 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1123 1124 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1125 # If this is True and '(' is not found, the keyword will be treated as an identifier 1126 VALUES_FOLLOWED_BY_PAREN = True 1127 1128 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1129 SUPPORTS_IMPLICIT_UNNEST = False 1130 1131 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1132 INTERVAL_SPANS = True 1133 1134 # Whether a PARTITION clause can follow a table reference 1135 SUPPORTS_PARTITION_SELECTION = False 1136 1137 __slots__ = ( 1138 "error_level", 1139 "error_message_context", 1140 "max_errors", 1141 "dialect", 1142 "sql", 1143 "errors", 1144 "_tokens", 1145 "_index", 1146 "_curr", 1147 "_next", 1148 "_prev", 1149 "_prev_comments", 1150 ) 1151 1152 # Autofilled 1153 SHOW_TRIE: t.Dict = {} 1154 SET_TRIE: t.Dict = {} 1155 1156 def __init__( 1157 self, 1158 error_level: t.Optional[ErrorLevel] = None, 1159 error_message_context: int = 100, 1160 max_errors: int = 3, 1161 dialect: DialectType = None, 1162 ): 1163 from sqlglot.dialects import Dialect 1164 1165 self.error_level = error_level or ErrorLevel.IMMEDIATE 1166 self.error_message_context = error_message_context 1167 self.max_errors = max_errors 1168 self.dialect = Dialect.get_or_raise(dialect) 1169 self.reset() 1170 1171 def reset(self): 1172 self.sql = "" 1173 self.errors = [] 1174 self._tokens = [] 1175 self._index = 0 1176 self._curr = None 1177 self._next = None 1178 self._prev = None 1179 self._prev_comments = None 1180 1181 def parse( 1182 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1183 ) -> t.List[t.Optional[exp.Expression]]: 1184 """ 1185 Parses a list of tokens and returns a list of syntax trees, one tree 1186 per parsed SQL statement. 1187 1188 Args: 1189 raw_tokens: The list of tokens. 1190 sql: The original SQL string, used to produce helpful debug messages. 1191 1192 Returns: 1193 The list of the produced syntax trees. 1194 """ 1195 return self._parse( 1196 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1197 ) 1198 1199 def parse_into( 1200 self, 1201 expression_types: exp.IntoType, 1202 raw_tokens: t.List[Token], 1203 sql: t.Optional[str] = None, 1204 ) -> t.List[t.Optional[exp.Expression]]: 1205 """ 1206 Parses a list of tokens into a given Expression type. If a collection of Expression 1207 types is given instead, this method will try to parse the token list into each one 1208 of them, stopping at the first for which the parsing succeeds. 1209 1210 Args: 1211 expression_types: The expression type(s) to try and parse the token list into. 1212 raw_tokens: The list of tokens. 1213 sql: The original SQL string, used to produce helpful debug messages. 1214 1215 Returns: 1216 The target Expression. 1217 """ 1218 errors = [] 1219 for expression_type in ensure_list(expression_types): 1220 parser = self.EXPRESSION_PARSERS.get(expression_type) 1221 if not parser: 1222 raise TypeError(f"No parser registered for {expression_type}") 1223 1224 try: 1225 return self._parse(parser, raw_tokens, sql) 1226 except ParseError as e: 1227 e.errors[0]["into_expression"] = expression_type 1228 errors.append(e) 1229 1230 raise ParseError( 1231 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1232 errors=merge_errors(errors), 1233 ) from errors[-1] 1234 1235 def _parse( 1236 self, 1237 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1238 raw_tokens: t.List[Token], 1239 sql: t.Optional[str] = None, 1240 ) -> t.List[t.Optional[exp.Expression]]: 1241 self.reset() 1242 self.sql = sql or "" 1243 1244 total = len(raw_tokens) 1245 chunks: t.List[t.List[Token]] = [[]] 1246 1247 for i, token in enumerate(raw_tokens): 1248 if token.token_type == TokenType.SEMICOLON: 1249 if i < total - 1: 1250 chunks.append([]) 1251 else: 1252 chunks[-1].append(token) 1253 1254 expressions = [] 1255 1256 for tokens in chunks: 1257 self._index = -1 1258 self._tokens = tokens 1259 self._advance() 1260 1261 expressions.append(parse_method(self)) 1262 1263 if self._index < len(self._tokens): 1264 self.raise_error("Invalid expression / Unexpected token") 1265 1266 self.check_errors() 1267 1268 return expressions 1269 1270 def check_errors(self) -> None: 1271 """Logs or raises any found errors, depending on the chosen error level setting.""" 1272 if self.error_level == ErrorLevel.WARN: 1273 for error in self.errors: 1274 logger.error(str(error)) 1275 elif self.error_level == ErrorLevel.RAISE and self.errors: 1276 raise ParseError( 1277 concat_messages(self.errors, self.max_errors), 1278 errors=merge_errors(self.errors), 1279 ) 1280 1281 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1282 """ 1283 Appends an error in the list of recorded errors or raises it, depending on the chosen 1284 error level setting. 1285 """ 1286 token = token or self._curr or self._prev or Token.string("") 1287 start = token.start 1288 end = token.end + 1 1289 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1290 highlight = self.sql[start:end] 1291 end_context = self.sql[end : end + self.error_message_context] 1292 1293 error = ParseError.new( 1294 f"{message}. Line {token.line}, Col: {token.col}.\n" 1295 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1296 description=message, 1297 line=token.line, 1298 col=token.col, 1299 start_context=start_context, 1300 highlight=highlight, 1301 end_context=end_context, 1302 ) 1303 1304 if self.error_level == ErrorLevel.IMMEDIATE: 1305 raise error 1306 1307 self.errors.append(error) 1308 1309 def expression( 1310 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1311 ) -> E: 1312 """ 1313 Creates a new, validated Expression. 1314 1315 Args: 1316 exp_class: The expression class to instantiate. 1317 comments: An optional list of comments to attach to the expression. 1318 kwargs: The arguments to set for the expression along with their respective values. 1319 1320 Returns: 1321 The target expression. 1322 """ 1323 instance = exp_class(**kwargs) 1324 instance.add_comments(comments) if comments else self._add_comments(instance) 1325 return self.validate_expression(instance) 1326 1327 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1328 if expression and self._prev_comments: 1329 expression.add_comments(self._prev_comments) 1330 self._prev_comments = None 1331 1332 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1333 """ 1334 Validates an Expression, making sure that all its mandatory arguments are set. 1335 1336 Args: 1337 expression: The expression to validate. 1338 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1339 1340 Returns: 1341 The validated expression. 1342 """ 1343 if self.error_level != ErrorLevel.IGNORE: 1344 for error_message in expression.error_messages(args): 1345 self.raise_error(error_message) 1346 1347 return expression 1348 1349 def _find_sql(self, start: Token, end: Token) -> str: 1350 return self.sql[start.start : end.end + 1] 1351 1352 def _is_connected(self) -> bool: 1353 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1354 1355 def _advance(self, times: int = 1) -> None: 1356 self._index += times 1357 self._curr = seq_get(self._tokens, self._index) 1358 self._next = seq_get(self._tokens, self._index + 1) 1359 1360 if self._index > 0: 1361 self._prev = self._tokens[self._index - 1] 1362 self._prev_comments = self._prev.comments 1363 else: 1364 self._prev = None 1365 self._prev_comments = None 1366 1367 def _retreat(self, index: int) -> None: 1368 if index != self._index: 1369 self._advance(index - self._index) 1370 1371 def _warn_unsupported(self) -> None: 1372 if len(self._tokens) <= 1: 1373 return 1374 1375 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1376 # interested in emitting a warning for the one being currently processed. 1377 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1378 1379 logger.warning( 1380 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1381 ) 1382 1383 def _parse_command(self) -> exp.Command: 1384 self._warn_unsupported() 1385 return self.expression( 1386 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1387 ) 1388 1389 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1390 """ 1391 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1392 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1393 the parser state accordingly 1394 """ 1395 index = self._index 1396 error_level = self.error_level 1397 1398 self.error_level = ErrorLevel.IMMEDIATE 1399 try: 1400 this = parse_method() 1401 except ParseError: 1402 this = None 1403 finally: 1404 if not this or retreat: 1405 self._retreat(index) 1406 self.error_level = error_level 1407 1408 return this 1409 1410 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1411 start = self._prev 1412 exists = self._parse_exists() if allow_exists else None 1413 1414 self._match(TokenType.ON) 1415 1416 materialized = self._match_text_seq("MATERIALIZED") 1417 kind = self._match_set(self.CREATABLES) and self._prev 1418 if not kind: 1419 return self._parse_as_command(start) 1420 1421 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1422 this = self._parse_user_defined_function(kind=kind.token_type) 1423 elif kind.token_type == TokenType.TABLE: 1424 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1425 elif kind.token_type == TokenType.COLUMN: 1426 this = self._parse_column() 1427 else: 1428 this = self._parse_id_var() 1429 1430 self._match(TokenType.IS) 1431 1432 return self.expression( 1433 exp.Comment, 1434 this=this, 1435 kind=kind.text, 1436 expression=self._parse_string(), 1437 exists=exists, 1438 materialized=materialized, 1439 ) 1440 1441 def _parse_to_table( 1442 self, 1443 ) -> exp.ToTableProperty: 1444 table = self._parse_table_parts(schema=True) 1445 return self.expression(exp.ToTableProperty, this=table) 1446 1447 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1448 def _parse_ttl(self) -> exp.Expression: 1449 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1450 this = self._parse_bitwise() 1451 1452 if self._match_text_seq("DELETE"): 1453 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1454 if self._match_text_seq("RECOMPRESS"): 1455 return self.expression( 1456 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1457 ) 1458 if self._match_text_seq("TO", "DISK"): 1459 return self.expression( 1460 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1461 ) 1462 if self._match_text_seq("TO", "VOLUME"): 1463 return self.expression( 1464 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1465 ) 1466 1467 return this 1468 1469 expressions = self._parse_csv(_parse_ttl_action) 1470 where = self._parse_where() 1471 group = self._parse_group() 1472 1473 aggregates = None 1474 if group and self._match(TokenType.SET): 1475 aggregates = self._parse_csv(self._parse_set_item) 1476 1477 return self.expression( 1478 exp.MergeTreeTTL, 1479 expressions=expressions, 1480 where=where, 1481 group=group, 1482 aggregates=aggregates, 1483 ) 1484 1485 def _parse_statement(self) -> t.Optional[exp.Expression]: 1486 if self._curr is None: 1487 return None 1488 1489 if self._match_set(self.STATEMENT_PARSERS): 1490 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1491 1492 if self._match_set(self.dialect.tokenizer.COMMANDS): 1493 return self._parse_command() 1494 1495 expression = self._parse_expression() 1496 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1497 return self._parse_query_modifiers(expression) 1498 1499 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1500 start = self._prev 1501 temporary = self._match(TokenType.TEMPORARY) 1502 materialized = self._match_text_seq("MATERIALIZED") 1503 1504 kind = self._match_set(self.CREATABLES) and self._prev.text 1505 if not kind: 1506 return self._parse_as_command(start) 1507 1508 if_exists = exists or self._parse_exists() 1509 table = self._parse_table_parts( 1510 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1511 ) 1512 1513 if self._match(TokenType.L_PAREN, advance=False): 1514 expressions = self._parse_wrapped_csv(self._parse_types) 1515 else: 1516 expressions = None 1517 1518 return self.expression( 1519 exp.Drop, 1520 comments=start.comments, 1521 exists=if_exists, 1522 this=table, 1523 expressions=expressions, 1524 kind=kind.upper(), 1525 temporary=temporary, 1526 materialized=materialized, 1527 cascade=self._match_text_seq("CASCADE"), 1528 constraints=self._match_text_seq("CONSTRAINTS"), 1529 purge=self._match_text_seq("PURGE"), 1530 ) 1531 1532 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1533 return ( 1534 self._match_text_seq("IF") 1535 and (not not_ or self._match(TokenType.NOT)) 1536 and self._match(TokenType.EXISTS) 1537 ) 1538 1539 def _parse_create(self) -> exp.Create | exp.Command: 1540 # Note: this can't be None because we've matched a statement parser 1541 start = self._prev 1542 comments = self._prev_comments 1543 1544 replace = ( 1545 start.token_type == TokenType.REPLACE 1546 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1547 or self._match_pair(TokenType.OR, TokenType.ALTER) 1548 ) 1549 1550 unique = self._match(TokenType.UNIQUE) 1551 1552 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1553 self._advance() 1554 1555 properties = None 1556 create_token = self._match_set(self.CREATABLES) and self._prev 1557 1558 if not create_token: 1559 # exp.Properties.Location.POST_CREATE 1560 properties = self._parse_properties() 1561 create_token = self._match_set(self.CREATABLES) and self._prev 1562 1563 if not properties or not create_token: 1564 return self._parse_as_command(start) 1565 1566 exists = self._parse_exists(not_=True) 1567 this = None 1568 expression: t.Optional[exp.Expression] = None 1569 indexes = None 1570 no_schema_binding = None 1571 begin = None 1572 end = None 1573 clone = None 1574 1575 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1576 nonlocal properties 1577 if properties and temp_props: 1578 properties.expressions.extend(temp_props.expressions) 1579 elif temp_props: 1580 properties = temp_props 1581 1582 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1583 this = self._parse_user_defined_function(kind=create_token.token_type) 1584 1585 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1586 extend_props(self._parse_properties()) 1587 1588 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1589 1590 if not expression: 1591 if self._match(TokenType.COMMAND): 1592 expression = self._parse_as_command(self._prev) 1593 else: 1594 begin = self._match(TokenType.BEGIN) 1595 return_ = self._match_text_seq("RETURN") 1596 1597 if self._match(TokenType.STRING, advance=False): 1598 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1599 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1600 expression = self._parse_string() 1601 extend_props(self._parse_properties()) 1602 else: 1603 expression = self._parse_statement() 1604 1605 end = self._match_text_seq("END") 1606 1607 if return_: 1608 expression = self.expression(exp.Return, this=expression) 1609 elif create_token.token_type == TokenType.INDEX: 1610 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1611 if not self._match(TokenType.ON): 1612 index = self._parse_id_var() 1613 anonymous = False 1614 else: 1615 index = None 1616 anonymous = True 1617 1618 this = self._parse_index(index=index, anonymous=anonymous) 1619 elif create_token.token_type in self.DB_CREATABLES: 1620 table_parts = self._parse_table_parts( 1621 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1622 ) 1623 1624 # exp.Properties.Location.POST_NAME 1625 self._match(TokenType.COMMA) 1626 extend_props(self._parse_properties(before=True)) 1627 1628 this = self._parse_schema(this=table_parts) 1629 1630 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1631 extend_props(self._parse_properties()) 1632 1633 self._match(TokenType.ALIAS) 1634 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1635 # exp.Properties.Location.POST_ALIAS 1636 extend_props(self._parse_properties()) 1637 1638 if create_token.token_type == TokenType.SEQUENCE: 1639 expression = self._parse_types() 1640 extend_props(self._parse_properties()) 1641 else: 1642 expression = self._parse_ddl_select() 1643 1644 if create_token.token_type == TokenType.TABLE: 1645 # exp.Properties.Location.POST_EXPRESSION 1646 extend_props(self._parse_properties()) 1647 1648 indexes = [] 1649 while True: 1650 index = self._parse_index() 1651 1652 # exp.Properties.Location.POST_INDEX 1653 extend_props(self._parse_properties()) 1654 1655 if not index: 1656 break 1657 else: 1658 self._match(TokenType.COMMA) 1659 indexes.append(index) 1660 elif create_token.token_type == TokenType.VIEW: 1661 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1662 no_schema_binding = True 1663 1664 shallow = self._match_text_seq("SHALLOW") 1665 1666 if self._match_texts(self.CLONE_KEYWORDS): 1667 copy = self._prev.text.lower() == "copy" 1668 clone = self.expression( 1669 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1670 ) 1671 1672 if self._curr: 1673 return self._parse_as_command(start) 1674 1675 return self.expression( 1676 exp.Create, 1677 comments=comments, 1678 this=this, 1679 kind=create_token.text.upper(), 1680 replace=replace, 1681 unique=unique, 1682 expression=expression, 1683 exists=exists, 1684 properties=properties, 1685 indexes=indexes, 1686 no_schema_binding=no_schema_binding, 1687 begin=begin, 1688 end=end, 1689 clone=clone, 1690 ) 1691 1692 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1693 seq = exp.SequenceProperties() 1694 1695 options = [] 1696 index = self._index 1697 1698 while self._curr: 1699 self._match(TokenType.COMMA) 1700 if self._match_text_seq("INCREMENT"): 1701 self._match_text_seq("BY") 1702 self._match_text_seq("=") 1703 seq.set("increment", self._parse_term()) 1704 elif self._match_text_seq("MINVALUE"): 1705 seq.set("minvalue", self._parse_term()) 1706 elif self._match_text_seq("MAXVALUE"): 1707 seq.set("maxvalue", self._parse_term()) 1708 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1709 self._match_text_seq("=") 1710 seq.set("start", self._parse_term()) 1711 elif self._match_text_seq("CACHE"): 1712 # T-SQL allows empty CACHE which is initialized dynamically 1713 seq.set("cache", self._parse_number() or True) 1714 elif self._match_text_seq("OWNED", "BY"): 1715 # "OWNED BY NONE" is the default 1716 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1717 else: 1718 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1719 if opt: 1720 options.append(opt) 1721 else: 1722 break 1723 1724 seq.set("options", options if options else None) 1725 return None if self._index == index else seq 1726 1727 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1728 # only used for teradata currently 1729 self._match(TokenType.COMMA) 1730 1731 kwargs = { 1732 "no": self._match_text_seq("NO"), 1733 "dual": self._match_text_seq("DUAL"), 1734 "before": self._match_text_seq("BEFORE"), 1735 "default": self._match_text_seq("DEFAULT"), 1736 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1737 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1738 "after": self._match_text_seq("AFTER"), 1739 "minimum": self._match_texts(("MIN", "MINIMUM")), 1740 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1741 } 1742 1743 if self._match_texts(self.PROPERTY_PARSERS): 1744 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1745 try: 1746 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1747 except TypeError: 1748 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1749 1750 return None 1751 1752 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1753 return self._parse_wrapped_csv(self._parse_property) 1754 1755 def _parse_property(self) -> t.Optional[exp.Expression]: 1756 if self._match_texts(self.PROPERTY_PARSERS): 1757 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1758 1759 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1760 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1761 1762 if self._match_text_seq("COMPOUND", "SORTKEY"): 1763 return self._parse_sortkey(compound=True) 1764 1765 if self._match_text_seq("SQL", "SECURITY"): 1766 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1767 1768 index = self._index 1769 key = self._parse_column() 1770 1771 if not self._match(TokenType.EQ): 1772 self._retreat(index) 1773 return self._parse_sequence_properties() 1774 1775 return self.expression( 1776 exp.Property, 1777 this=key.to_dot() if isinstance(key, exp.Column) else key, 1778 value=self._parse_bitwise() or self._parse_var(any_token=True), 1779 ) 1780 1781 def _parse_stored(self) -> exp.FileFormatProperty: 1782 self._match(TokenType.ALIAS) 1783 1784 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1785 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1786 1787 return self.expression( 1788 exp.FileFormatProperty, 1789 this=( 1790 self.expression( 1791 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1792 ) 1793 if input_format or output_format 1794 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1795 ), 1796 ) 1797 1798 def _parse_unquoted_field(self): 1799 field = self._parse_field() 1800 if isinstance(field, exp.Identifier) and not field.quoted: 1801 field = exp.var(field) 1802 1803 return field 1804 1805 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1806 self._match(TokenType.EQ) 1807 self._match(TokenType.ALIAS) 1808 1809 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1810 1811 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1812 properties = [] 1813 while True: 1814 if before: 1815 prop = self._parse_property_before() 1816 else: 1817 prop = self._parse_property() 1818 if not prop: 1819 break 1820 for p in ensure_list(prop): 1821 properties.append(p) 1822 1823 if properties: 1824 return self.expression(exp.Properties, expressions=properties) 1825 1826 return None 1827 1828 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1829 return self.expression( 1830 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1831 ) 1832 1833 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1834 if self._index >= 2: 1835 pre_volatile_token = self._tokens[self._index - 2] 1836 else: 1837 pre_volatile_token = None 1838 1839 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1840 return exp.VolatileProperty() 1841 1842 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1843 1844 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1845 self._match_pair(TokenType.EQ, TokenType.ON) 1846 1847 prop = self.expression(exp.WithSystemVersioningProperty) 1848 if self._match(TokenType.L_PAREN): 1849 self._match_text_seq("HISTORY_TABLE", "=") 1850 prop.set("this", self._parse_table_parts()) 1851 1852 if self._match(TokenType.COMMA): 1853 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1854 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1855 1856 self._match_r_paren() 1857 1858 return prop 1859 1860 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1861 if self._match(TokenType.L_PAREN, advance=False): 1862 return self._parse_wrapped_properties() 1863 1864 if self._match_text_seq("JOURNAL"): 1865 return self._parse_withjournaltable() 1866 1867 if self._match_texts(self.VIEW_ATTRIBUTES): 1868 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1869 1870 if self._match_text_seq("DATA"): 1871 return self._parse_withdata(no=False) 1872 elif self._match_text_seq("NO", "DATA"): 1873 return self._parse_withdata(no=True) 1874 1875 if not self._next: 1876 return None 1877 1878 return self._parse_withisolatedloading() 1879 1880 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1881 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1882 self._match(TokenType.EQ) 1883 1884 user = self._parse_id_var() 1885 self._match(TokenType.PARAMETER) 1886 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1887 1888 if not user or not host: 1889 return None 1890 1891 return exp.DefinerProperty(this=f"{user}@{host}") 1892 1893 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1894 self._match(TokenType.TABLE) 1895 self._match(TokenType.EQ) 1896 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1897 1898 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1899 return self.expression(exp.LogProperty, no=no) 1900 1901 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1902 return self.expression(exp.JournalProperty, **kwargs) 1903 1904 def _parse_checksum(self) -> exp.ChecksumProperty: 1905 self._match(TokenType.EQ) 1906 1907 on = None 1908 if self._match(TokenType.ON): 1909 on = True 1910 elif self._match_text_seq("OFF"): 1911 on = False 1912 1913 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1914 1915 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1916 return self.expression( 1917 exp.Cluster, 1918 expressions=( 1919 self._parse_wrapped_csv(self._parse_ordered) 1920 if wrapped 1921 else self._parse_csv(self._parse_ordered) 1922 ), 1923 ) 1924 1925 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1926 self._match_text_seq("BY") 1927 1928 self._match_l_paren() 1929 expressions = self._parse_csv(self._parse_column) 1930 self._match_r_paren() 1931 1932 if self._match_text_seq("SORTED", "BY"): 1933 self._match_l_paren() 1934 sorted_by = self._parse_csv(self._parse_ordered) 1935 self._match_r_paren() 1936 else: 1937 sorted_by = None 1938 1939 self._match(TokenType.INTO) 1940 buckets = self._parse_number() 1941 self._match_text_seq("BUCKETS") 1942 1943 return self.expression( 1944 exp.ClusteredByProperty, 1945 expressions=expressions, 1946 sorted_by=sorted_by, 1947 buckets=buckets, 1948 ) 1949 1950 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1951 if not self._match_text_seq("GRANTS"): 1952 self._retreat(self._index - 1) 1953 return None 1954 1955 return self.expression(exp.CopyGrantsProperty) 1956 1957 def _parse_freespace(self) -> exp.FreespaceProperty: 1958 self._match(TokenType.EQ) 1959 return self.expression( 1960 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1961 ) 1962 1963 def _parse_mergeblockratio( 1964 self, no: bool = False, default: bool = False 1965 ) -> exp.MergeBlockRatioProperty: 1966 if self._match(TokenType.EQ): 1967 return self.expression( 1968 exp.MergeBlockRatioProperty, 1969 this=self._parse_number(), 1970 percent=self._match(TokenType.PERCENT), 1971 ) 1972 1973 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1974 1975 def _parse_datablocksize( 1976 self, 1977 default: t.Optional[bool] = None, 1978 minimum: t.Optional[bool] = None, 1979 maximum: t.Optional[bool] = None, 1980 ) -> exp.DataBlocksizeProperty: 1981 self._match(TokenType.EQ) 1982 size = self._parse_number() 1983 1984 units = None 1985 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1986 units = self._prev.text 1987 1988 return self.expression( 1989 exp.DataBlocksizeProperty, 1990 size=size, 1991 units=units, 1992 default=default, 1993 minimum=minimum, 1994 maximum=maximum, 1995 ) 1996 1997 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1998 self._match(TokenType.EQ) 1999 always = self._match_text_seq("ALWAYS") 2000 manual = self._match_text_seq("MANUAL") 2001 never = self._match_text_seq("NEVER") 2002 default = self._match_text_seq("DEFAULT") 2003 2004 autotemp = None 2005 if self._match_text_seq("AUTOTEMP"): 2006 autotemp = self._parse_schema() 2007 2008 return self.expression( 2009 exp.BlockCompressionProperty, 2010 always=always, 2011 manual=manual, 2012 never=never, 2013 default=default, 2014 autotemp=autotemp, 2015 ) 2016 2017 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2018 index = self._index 2019 no = self._match_text_seq("NO") 2020 concurrent = self._match_text_seq("CONCURRENT") 2021 2022 if not self._match_text_seq("ISOLATED", "LOADING"): 2023 self._retreat(index) 2024 return None 2025 2026 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2027 return self.expression( 2028 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2029 ) 2030 2031 def _parse_locking(self) -> exp.LockingProperty: 2032 if self._match(TokenType.TABLE): 2033 kind = "TABLE" 2034 elif self._match(TokenType.VIEW): 2035 kind = "VIEW" 2036 elif self._match(TokenType.ROW): 2037 kind = "ROW" 2038 elif self._match_text_seq("DATABASE"): 2039 kind = "DATABASE" 2040 else: 2041 kind = None 2042 2043 if kind in ("DATABASE", "TABLE", "VIEW"): 2044 this = self._parse_table_parts() 2045 else: 2046 this = None 2047 2048 if self._match(TokenType.FOR): 2049 for_or_in = "FOR" 2050 elif self._match(TokenType.IN): 2051 for_or_in = "IN" 2052 else: 2053 for_or_in = None 2054 2055 if self._match_text_seq("ACCESS"): 2056 lock_type = "ACCESS" 2057 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2058 lock_type = "EXCLUSIVE" 2059 elif self._match_text_seq("SHARE"): 2060 lock_type = "SHARE" 2061 elif self._match_text_seq("READ"): 2062 lock_type = "READ" 2063 elif self._match_text_seq("WRITE"): 2064 lock_type = "WRITE" 2065 elif self._match_text_seq("CHECKSUM"): 2066 lock_type = "CHECKSUM" 2067 else: 2068 lock_type = None 2069 2070 override = self._match_text_seq("OVERRIDE") 2071 2072 return self.expression( 2073 exp.LockingProperty, 2074 this=this, 2075 kind=kind, 2076 for_or_in=for_or_in, 2077 lock_type=lock_type, 2078 override=override, 2079 ) 2080 2081 def _parse_partition_by(self) -> t.List[exp.Expression]: 2082 if self._match(TokenType.PARTITION_BY): 2083 return self._parse_csv(self._parse_conjunction) 2084 return [] 2085 2086 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2087 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2088 if self._match_text_seq("MINVALUE"): 2089 return exp.var("MINVALUE") 2090 if self._match_text_seq("MAXVALUE"): 2091 return exp.var("MAXVALUE") 2092 return self._parse_bitwise() 2093 2094 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2095 expression = None 2096 from_expressions = None 2097 to_expressions = None 2098 2099 if self._match(TokenType.IN): 2100 this = self._parse_wrapped_csv(self._parse_bitwise) 2101 elif self._match(TokenType.FROM): 2102 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2103 self._match_text_seq("TO") 2104 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2105 elif self._match_text_seq("WITH", "(", "MODULUS"): 2106 this = self._parse_number() 2107 self._match_text_seq(",", "REMAINDER") 2108 expression = self._parse_number() 2109 self._match_r_paren() 2110 else: 2111 self.raise_error("Failed to parse partition bound spec.") 2112 2113 return self.expression( 2114 exp.PartitionBoundSpec, 2115 this=this, 2116 expression=expression, 2117 from_expressions=from_expressions, 2118 to_expressions=to_expressions, 2119 ) 2120 2121 # https://www.postgresql.org/docs/current/sql-createtable.html 2122 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2123 if not self._match_text_seq("OF"): 2124 self._retreat(self._index - 1) 2125 return None 2126 2127 this = self._parse_table(schema=True) 2128 2129 if self._match(TokenType.DEFAULT): 2130 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2131 elif self._match_text_seq("FOR", "VALUES"): 2132 expression = self._parse_partition_bound_spec() 2133 else: 2134 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2135 2136 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2137 2138 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2139 self._match(TokenType.EQ) 2140 return self.expression( 2141 exp.PartitionedByProperty, 2142 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2143 ) 2144 2145 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2146 if self._match_text_seq("AND", "STATISTICS"): 2147 statistics = True 2148 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2149 statistics = False 2150 else: 2151 statistics = None 2152 2153 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2154 2155 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2156 if self._match_text_seq("SQL"): 2157 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2158 return None 2159 2160 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2161 if self._match_text_seq("SQL", "DATA"): 2162 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2163 return None 2164 2165 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2166 if self._match_text_seq("PRIMARY", "INDEX"): 2167 return exp.NoPrimaryIndexProperty() 2168 if self._match_text_seq("SQL"): 2169 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2170 return None 2171 2172 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2173 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2174 return exp.OnCommitProperty() 2175 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2176 return exp.OnCommitProperty(delete=True) 2177 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2178 2179 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2180 if self._match_text_seq("SQL", "DATA"): 2181 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2182 return None 2183 2184 def _parse_distkey(self) -> exp.DistKeyProperty: 2185 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2186 2187 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2188 table = self._parse_table(schema=True) 2189 2190 options = [] 2191 while self._match_texts(("INCLUDING", "EXCLUDING")): 2192 this = self._prev.text.upper() 2193 2194 id_var = self._parse_id_var() 2195 if not id_var: 2196 return None 2197 2198 options.append( 2199 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2200 ) 2201 2202 return self.expression(exp.LikeProperty, this=table, expressions=options) 2203 2204 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2205 return self.expression( 2206 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2207 ) 2208 2209 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2210 self._match(TokenType.EQ) 2211 return self.expression( 2212 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2213 ) 2214 2215 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2216 self._match_text_seq("WITH", "CONNECTION") 2217 return self.expression( 2218 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2219 ) 2220 2221 def _parse_returns(self) -> exp.ReturnsProperty: 2222 value: t.Optional[exp.Expression] 2223 is_table = self._match(TokenType.TABLE) 2224 2225 if is_table: 2226 if self._match(TokenType.LT): 2227 value = self.expression( 2228 exp.Schema, 2229 this="TABLE", 2230 expressions=self._parse_csv(self._parse_struct_types), 2231 ) 2232 if not self._match(TokenType.GT): 2233 self.raise_error("Expecting >") 2234 else: 2235 value = self._parse_schema(exp.var("TABLE")) 2236 else: 2237 value = self._parse_types() 2238 2239 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2240 2241 def _parse_describe(self) -> exp.Describe: 2242 kind = self._match_set(self.CREATABLES) and self._prev.text 2243 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2244 if self._match(TokenType.DOT): 2245 style = None 2246 self._retreat(self._index - 2) 2247 this = self._parse_table(schema=True) 2248 properties = self._parse_properties() 2249 expressions = properties.expressions if properties else None 2250 return self.expression( 2251 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2252 ) 2253 2254 def _parse_insert(self) -> exp.Insert: 2255 comments = ensure_list(self._prev_comments) 2256 hint = self._parse_hint() 2257 overwrite = self._match(TokenType.OVERWRITE) 2258 ignore = self._match(TokenType.IGNORE) 2259 local = self._match_text_seq("LOCAL") 2260 alternative = None 2261 is_function = None 2262 2263 if self._match_text_seq("DIRECTORY"): 2264 this: t.Optional[exp.Expression] = self.expression( 2265 exp.Directory, 2266 this=self._parse_var_or_string(), 2267 local=local, 2268 row_format=self._parse_row_format(match_row=True), 2269 ) 2270 else: 2271 if self._match(TokenType.OR): 2272 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2273 2274 self._match(TokenType.INTO) 2275 comments += ensure_list(self._prev_comments) 2276 self._match(TokenType.TABLE) 2277 is_function = self._match(TokenType.FUNCTION) 2278 2279 this = ( 2280 self._parse_table(schema=True, parse_partition=True) 2281 if not is_function 2282 else self._parse_function() 2283 ) 2284 2285 returning = self._parse_returning() 2286 2287 return self.expression( 2288 exp.Insert, 2289 comments=comments, 2290 hint=hint, 2291 is_function=is_function, 2292 this=this, 2293 stored=self._match_text_seq("STORED") and self._parse_stored(), 2294 by_name=self._match_text_seq("BY", "NAME"), 2295 exists=self._parse_exists(), 2296 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2297 and self._parse_conjunction(), 2298 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2299 conflict=self._parse_on_conflict(), 2300 returning=returning or self._parse_returning(), 2301 overwrite=overwrite, 2302 alternative=alternative, 2303 ignore=ignore, 2304 ) 2305 2306 def _parse_kill(self) -> exp.Kill: 2307 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2308 2309 return self.expression( 2310 exp.Kill, 2311 this=self._parse_primary(), 2312 kind=kind, 2313 ) 2314 2315 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2316 conflict = self._match_text_seq("ON", "CONFLICT") 2317 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2318 2319 if not conflict and not duplicate: 2320 return None 2321 2322 conflict_keys = None 2323 constraint = None 2324 2325 if conflict: 2326 if self._match_text_seq("ON", "CONSTRAINT"): 2327 constraint = self._parse_id_var() 2328 elif self._match(TokenType.L_PAREN): 2329 conflict_keys = self._parse_csv(self._parse_id_var) 2330 self._match_r_paren() 2331 2332 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2333 if self._prev.token_type == TokenType.UPDATE: 2334 self._match(TokenType.SET) 2335 expressions = self._parse_csv(self._parse_equality) 2336 else: 2337 expressions = None 2338 2339 return self.expression( 2340 exp.OnConflict, 2341 duplicate=duplicate, 2342 expressions=expressions, 2343 action=action, 2344 conflict_keys=conflict_keys, 2345 constraint=constraint, 2346 ) 2347 2348 def _parse_returning(self) -> t.Optional[exp.Returning]: 2349 if not self._match(TokenType.RETURNING): 2350 return None 2351 return self.expression( 2352 exp.Returning, 2353 expressions=self._parse_csv(self._parse_expression), 2354 into=self._match(TokenType.INTO) and self._parse_table_part(), 2355 ) 2356 2357 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2358 if not self._match(TokenType.FORMAT): 2359 return None 2360 return self._parse_row_format() 2361 2362 def _parse_row_format( 2363 self, match_row: bool = False 2364 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2365 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2366 return None 2367 2368 if self._match_text_seq("SERDE"): 2369 this = self._parse_string() 2370 2371 serde_properties = None 2372 if self._match(TokenType.SERDE_PROPERTIES): 2373 serde_properties = self.expression( 2374 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2375 ) 2376 2377 return self.expression( 2378 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2379 ) 2380 2381 self._match_text_seq("DELIMITED") 2382 2383 kwargs = {} 2384 2385 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2386 kwargs["fields"] = self._parse_string() 2387 if self._match_text_seq("ESCAPED", "BY"): 2388 kwargs["escaped"] = self._parse_string() 2389 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2390 kwargs["collection_items"] = self._parse_string() 2391 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2392 kwargs["map_keys"] = self._parse_string() 2393 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2394 kwargs["lines"] = self._parse_string() 2395 if self._match_text_seq("NULL", "DEFINED", "AS"): 2396 kwargs["null"] = self._parse_string() 2397 2398 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2399 2400 def _parse_load(self) -> exp.LoadData | exp.Command: 2401 if self._match_text_seq("DATA"): 2402 local = self._match_text_seq("LOCAL") 2403 self._match_text_seq("INPATH") 2404 inpath = self._parse_string() 2405 overwrite = self._match(TokenType.OVERWRITE) 2406 self._match_pair(TokenType.INTO, TokenType.TABLE) 2407 2408 return self.expression( 2409 exp.LoadData, 2410 this=self._parse_table(schema=True), 2411 local=local, 2412 overwrite=overwrite, 2413 inpath=inpath, 2414 partition=self._parse_partition(), 2415 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2416 serde=self._match_text_seq("SERDE") and self._parse_string(), 2417 ) 2418 return self._parse_as_command(self._prev) 2419 2420 def _parse_delete(self) -> exp.Delete: 2421 # This handles MySQL's "Multiple-Table Syntax" 2422 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2423 tables = None 2424 comments = self._prev_comments 2425 if not self._match(TokenType.FROM, advance=False): 2426 tables = self._parse_csv(self._parse_table) or None 2427 2428 returning = self._parse_returning() 2429 2430 return self.expression( 2431 exp.Delete, 2432 comments=comments, 2433 tables=tables, 2434 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2435 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2436 where=self._parse_where(), 2437 returning=returning or self._parse_returning(), 2438 limit=self._parse_limit(), 2439 ) 2440 2441 def _parse_update(self) -> exp.Update: 2442 comments = self._prev_comments 2443 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2444 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2445 returning = self._parse_returning() 2446 return self.expression( 2447 exp.Update, 2448 comments=comments, 2449 **{ # type: ignore 2450 "this": this, 2451 "expressions": expressions, 2452 "from": self._parse_from(joins=True), 2453 "where": self._parse_where(), 2454 "returning": returning or self._parse_returning(), 2455 "order": self._parse_order(), 2456 "limit": self._parse_limit(), 2457 }, 2458 ) 2459 2460 def _parse_uncache(self) -> exp.Uncache: 2461 if not self._match(TokenType.TABLE): 2462 self.raise_error("Expecting TABLE after UNCACHE") 2463 2464 return self.expression( 2465 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2466 ) 2467 2468 def _parse_cache(self) -> exp.Cache: 2469 lazy = self._match_text_seq("LAZY") 2470 self._match(TokenType.TABLE) 2471 table = self._parse_table(schema=True) 2472 2473 options = [] 2474 if self._match_text_seq("OPTIONS"): 2475 self._match_l_paren() 2476 k = self._parse_string() 2477 self._match(TokenType.EQ) 2478 v = self._parse_string() 2479 options = [k, v] 2480 self._match_r_paren() 2481 2482 self._match(TokenType.ALIAS) 2483 return self.expression( 2484 exp.Cache, 2485 this=table, 2486 lazy=lazy, 2487 options=options, 2488 expression=self._parse_select(nested=True), 2489 ) 2490 2491 def _parse_partition(self) -> t.Optional[exp.Partition]: 2492 if not self._match(TokenType.PARTITION): 2493 return None 2494 2495 return self.expression( 2496 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2497 ) 2498 2499 def _parse_value(self) -> t.Optional[exp.Tuple]: 2500 if self._match(TokenType.L_PAREN): 2501 expressions = self._parse_csv(self._parse_expression) 2502 self._match_r_paren() 2503 return self.expression(exp.Tuple, expressions=expressions) 2504 2505 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2506 expression = self._parse_expression() 2507 if expression: 2508 return self.expression(exp.Tuple, expressions=[expression]) 2509 return None 2510 2511 def _parse_projections(self) -> t.List[exp.Expression]: 2512 return self._parse_expressions() 2513 2514 def _parse_select( 2515 self, 2516 nested: bool = False, 2517 table: bool = False, 2518 parse_subquery_alias: bool = True, 2519 parse_set_operation: bool = True, 2520 ) -> t.Optional[exp.Expression]: 2521 cte = self._parse_with() 2522 2523 if cte: 2524 this = self._parse_statement() 2525 2526 if not this: 2527 self.raise_error("Failed to parse any statement following CTE") 2528 return cte 2529 2530 if "with" in this.arg_types: 2531 this.set("with", cte) 2532 else: 2533 self.raise_error(f"{this.key} does not support CTE") 2534 this = cte 2535 2536 return this 2537 2538 # duckdb supports leading with FROM x 2539 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2540 2541 if self._match(TokenType.SELECT): 2542 comments = self._prev_comments 2543 2544 hint = self._parse_hint() 2545 all_ = self._match(TokenType.ALL) 2546 distinct = self._match_set(self.DISTINCT_TOKENS) 2547 2548 kind = ( 2549 self._match(TokenType.ALIAS) 2550 and self._match_texts(("STRUCT", "VALUE")) 2551 and self._prev.text.upper() 2552 ) 2553 2554 if distinct: 2555 distinct = self.expression( 2556 exp.Distinct, 2557 on=self._parse_value() if self._match(TokenType.ON) else None, 2558 ) 2559 2560 if all_ and distinct: 2561 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2562 2563 limit = self._parse_limit(top=True) 2564 projections = self._parse_projections() 2565 2566 this = self.expression( 2567 exp.Select, 2568 kind=kind, 2569 hint=hint, 2570 distinct=distinct, 2571 expressions=projections, 2572 limit=limit, 2573 ) 2574 this.comments = comments 2575 2576 into = self._parse_into() 2577 if into: 2578 this.set("into", into) 2579 2580 if not from_: 2581 from_ = self._parse_from() 2582 2583 if from_: 2584 this.set("from", from_) 2585 2586 this = self._parse_query_modifiers(this) 2587 elif (table or nested) and self._match(TokenType.L_PAREN): 2588 if self._match(TokenType.PIVOT): 2589 this = self._parse_simplified_pivot() 2590 elif self._match(TokenType.FROM): 2591 this = exp.select("*").from_( 2592 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2593 ) 2594 else: 2595 this = ( 2596 self._parse_table() 2597 if table 2598 else self._parse_select(nested=True, parse_set_operation=False) 2599 ) 2600 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2601 2602 self._match_r_paren() 2603 2604 # We return early here so that the UNION isn't attached to the subquery by the 2605 # following call to _parse_set_operations, but instead becomes the parent node 2606 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2607 elif self._match(TokenType.VALUES, advance=False): 2608 this = self._parse_derived_table_values() 2609 elif from_: 2610 this = exp.select("*").from_(from_.this, copy=False) 2611 else: 2612 this = None 2613 2614 if parse_set_operation: 2615 return self._parse_set_operations(this) 2616 return this 2617 2618 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2619 if not skip_with_token and not self._match(TokenType.WITH): 2620 return None 2621 2622 comments = self._prev_comments 2623 recursive = self._match(TokenType.RECURSIVE) 2624 2625 expressions = [] 2626 while True: 2627 expressions.append(self._parse_cte()) 2628 2629 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2630 break 2631 else: 2632 self._match(TokenType.WITH) 2633 2634 return self.expression( 2635 exp.With, comments=comments, expressions=expressions, recursive=recursive 2636 ) 2637 2638 def _parse_cte(self) -> exp.CTE: 2639 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2640 if not alias or not alias.this: 2641 self.raise_error("Expected CTE to have alias") 2642 2643 self._match(TokenType.ALIAS) 2644 2645 if self._match_text_seq("NOT", "MATERIALIZED"): 2646 materialized = False 2647 elif self._match_text_seq("MATERIALIZED"): 2648 materialized = True 2649 else: 2650 materialized = None 2651 2652 return self.expression( 2653 exp.CTE, 2654 this=self._parse_wrapped(self._parse_statement), 2655 alias=alias, 2656 materialized=materialized, 2657 ) 2658 2659 def _parse_table_alias( 2660 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2661 ) -> t.Optional[exp.TableAlias]: 2662 any_token = self._match(TokenType.ALIAS) 2663 alias = ( 2664 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2665 or self._parse_string_as_identifier() 2666 ) 2667 2668 index = self._index 2669 if self._match(TokenType.L_PAREN): 2670 columns = self._parse_csv(self._parse_function_parameter) 2671 self._match_r_paren() if columns else self._retreat(index) 2672 else: 2673 columns = None 2674 2675 if not alias and not columns: 2676 return None 2677 2678 return self.expression(exp.TableAlias, this=alias, columns=columns) 2679 2680 def _parse_subquery( 2681 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2682 ) -> t.Optional[exp.Subquery]: 2683 if not this: 2684 return None 2685 2686 return self.expression( 2687 exp.Subquery, 2688 this=this, 2689 pivots=self._parse_pivots(), 2690 alias=self._parse_table_alias() if parse_alias else None, 2691 ) 2692 2693 def _implicit_unnests_to_explicit(self, this: E) -> E: 2694 from sqlglot.optimizer.normalize_identifiers import ( 2695 normalize_identifiers as _norm, 2696 ) 2697 2698 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2699 for i, join in enumerate(this.args.get("joins") or []): 2700 table = join.this 2701 normalized_table = table.copy() 2702 normalized_table.meta["maybe_column"] = True 2703 normalized_table = _norm(normalized_table, dialect=self.dialect) 2704 2705 if isinstance(table, exp.Table) and not join.args.get("on"): 2706 if normalized_table.parts[0].name in refs: 2707 table_as_column = table.to_column() 2708 unnest = exp.Unnest(expressions=[table_as_column]) 2709 2710 # Table.to_column creates a parent Alias node that we want to convert to 2711 # a TableAlias and attach to the Unnest, so it matches the parser's output 2712 if isinstance(table.args.get("alias"), exp.TableAlias): 2713 table_as_column.replace(table_as_column.this) 2714 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2715 2716 table.replace(unnest) 2717 2718 refs.add(normalized_table.alias_or_name) 2719 2720 return this 2721 2722 def _parse_query_modifiers( 2723 self, this: t.Optional[exp.Expression] 2724 ) -> t.Optional[exp.Expression]: 2725 if isinstance(this, (exp.Query, exp.Table)): 2726 for join in self._parse_joins(): 2727 this.append("joins", join) 2728 for lateral in iter(self._parse_lateral, None): 2729 this.append("laterals", lateral) 2730 2731 while True: 2732 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2733 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2734 key, expression = parser(self) 2735 2736 if expression: 2737 this.set(key, expression) 2738 if key == "limit": 2739 offset = expression.args.pop("offset", None) 2740 2741 if offset: 2742 offset = exp.Offset(expression=offset) 2743 this.set("offset", offset) 2744 2745 limit_by_expressions = expression.expressions 2746 expression.set("expressions", None) 2747 offset.set("expressions", limit_by_expressions) 2748 continue 2749 break 2750 2751 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2752 this = self._implicit_unnests_to_explicit(this) 2753 2754 return this 2755 2756 def _parse_hint(self) -> t.Optional[exp.Hint]: 2757 if self._match(TokenType.HINT): 2758 hints = [] 2759 for hint in iter( 2760 lambda: self._parse_csv( 2761 lambda: self._parse_function() or self._parse_var(upper=True) 2762 ), 2763 [], 2764 ): 2765 hints.extend(hint) 2766 2767 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2768 self.raise_error("Expected */ after HINT") 2769 2770 return self.expression(exp.Hint, expressions=hints) 2771 2772 return None 2773 2774 def _parse_into(self) -> t.Optional[exp.Into]: 2775 if not self._match(TokenType.INTO): 2776 return None 2777 2778 temp = self._match(TokenType.TEMPORARY) 2779 unlogged = self._match_text_seq("UNLOGGED") 2780 self._match(TokenType.TABLE) 2781 2782 return self.expression( 2783 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2784 ) 2785 2786 def _parse_from( 2787 self, joins: bool = False, skip_from_token: bool = False 2788 ) -> t.Optional[exp.From]: 2789 if not skip_from_token and not self._match(TokenType.FROM): 2790 return None 2791 2792 return self.expression( 2793 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2794 ) 2795 2796 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2797 return self.expression( 2798 exp.MatchRecognizeMeasure, 2799 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2800 this=self._parse_expression(), 2801 ) 2802 2803 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2804 if not self._match(TokenType.MATCH_RECOGNIZE): 2805 return None 2806 2807 self._match_l_paren() 2808 2809 partition = self._parse_partition_by() 2810 order = self._parse_order() 2811 2812 measures = ( 2813 self._parse_csv(self._parse_match_recognize_measure) 2814 if self._match_text_seq("MEASURES") 2815 else None 2816 ) 2817 2818 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2819 rows = exp.var("ONE ROW PER MATCH") 2820 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2821 text = "ALL ROWS PER MATCH" 2822 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2823 text += " SHOW EMPTY MATCHES" 2824 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2825 text += " OMIT EMPTY MATCHES" 2826 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2827 text += " WITH UNMATCHED ROWS" 2828 rows = exp.var(text) 2829 else: 2830 rows = None 2831 2832 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2833 text = "AFTER MATCH SKIP" 2834 if self._match_text_seq("PAST", "LAST", "ROW"): 2835 text += " PAST LAST ROW" 2836 elif self._match_text_seq("TO", "NEXT", "ROW"): 2837 text += " TO NEXT ROW" 2838 elif self._match_text_seq("TO", "FIRST"): 2839 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2840 elif self._match_text_seq("TO", "LAST"): 2841 text += f" TO LAST {self._advance_any().text}" # type: ignore 2842 after = exp.var(text) 2843 else: 2844 after = None 2845 2846 if self._match_text_seq("PATTERN"): 2847 self._match_l_paren() 2848 2849 if not self._curr: 2850 self.raise_error("Expecting )", self._curr) 2851 2852 paren = 1 2853 start = self._curr 2854 2855 while self._curr and paren > 0: 2856 if self._curr.token_type == TokenType.L_PAREN: 2857 paren += 1 2858 if self._curr.token_type == TokenType.R_PAREN: 2859 paren -= 1 2860 2861 end = self._prev 2862 self._advance() 2863 2864 if paren > 0: 2865 self.raise_error("Expecting )", self._curr) 2866 2867 pattern = exp.var(self._find_sql(start, end)) 2868 else: 2869 pattern = None 2870 2871 define = ( 2872 self._parse_csv(self._parse_name_as_expression) 2873 if self._match_text_seq("DEFINE") 2874 else None 2875 ) 2876 2877 self._match_r_paren() 2878 2879 return self.expression( 2880 exp.MatchRecognize, 2881 partition_by=partition, 2882 order=order, 2883 measures=measures, 2884 rows=rows, 2885 after=after, 2886 pattern=pattern, 2887 define=define, 2888 alias=self._parse_table_alias(), 2889 ) 2890 2891 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2892 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2893 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2894 cross_apply = False 2895 2896 if cross_apply is not None: 2897 this = self._parse_select(table=True) 2898 view = None 2899 outer = None 2900 elif self._match(TokenType.LATERAL): 2901 this = self._parse_select(table=True) 2902 view = self._match(TokenType.VIEW) 2903 outer = self._match(TokenType.OUTER) 2904 else: 2905 return None 2906 2907 if not this: 2908 this = ( 2909 self._parse_unnest() 2910 or self._parse_function() 2911 or self._parse_id_var(any_token=False) 2912 ) 2913 2914 while self._match(TokenType.DOT): 2915 this = exp.Dot( 2916 this=this, 2917 expression=self._parse_function() or self._parse_id_var(any_token=False), 2918 ) 2919 2920 if view: 2921 table = self._parse_id_var(any_token=False) 2922 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2923 table_alias: t.Optional[exp.TableAlias] = self.expression( 2924 exp.TableAlias, this=table, columns=columns 2925 ) 2926 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2927 # We move the alias from the lateral's child node to the lateral itself 2928 table_alias = this.args["alias"].pop() 2929 else: 2930 table_alias = self._parse_table_alias() 2931 2932 return self.expression( 2933 exp.Lateral, 2934 this=this, 2935 view=view, 2936 outer=outer, 2937 alias=table_alias, 2938 cross_apply=cross_apply, 2939 ) 2940 2941 def _parse_join_parts( 2942 self, 2943 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2944 return ( 2945 self._match_set(self.JOIN_METHODS) and self._prev, 2946 self._match_set(self.JOIN_SIDES) and self._prev, 2947 self._match_set(self.JOIN_KINDS) and self._prev, 2948 ) 2949 2950 def _parse_join( 2951 self, skip_join_token: bool = False, parse_bracket: bool = False 2952 ) -> t.Optional[exp.Join]: 2953 if self._match(TokenType.COMMA): 2954 return self.expression(exp.Join, this=self._parse_table()) 2955 2956 index = self._index 2957 method, side, kind = self._parse_join_parts() 2958 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2959 join = self._match(TokenType.JOIN) 2960 2961 if not skip_join_token and not join: 2962 self._retreat(index) 2963 kind = None 2964 method = None 2965 side = None 2966 2967 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2968 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2969 2970 if not skip_join_token and not join and not outer_apply and not cross_apply: 2971 return None 2972 2973 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2974 2975 if method: 2976 kwargs["method"] = method.text 2977 if side: 2978 kwargs["side"] = side.text 2979 if kind: 2980 kwargs["kind"] = kind.text 2981 if hint: 2982 kwargs["hint"] = hint 2983 2984 if self._match(TokenType.MATCH_CONDITION): 2985 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 2986 2987 if self._match(TokenType.ON): 2988 kwargs["on"] = self._parse_conjunction() 2989 elif self._match(TokenType.USING): 2990 kwargs["using"] = self._parse_wrapped_id_vars() 2991 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 2992 kind and kind.token_type == TokenType.CROSS 2993 ): 2994 index = self._index 2995 joins: t.Optional[list] = list(self._parse_joins()) 2996 2997 if joins and self._match(TokenType.ON): 2998 kwargs["on"] = self._parse_conjunction() 2999 elif joins and self._match(TokenType.USING): 3000 kwargs["using"] = self._parse_wrapped_id_vars() 3001 else: 3002 joins = None 3003 self._retreat(index) 3004 3005 kwargs["this"].set("joins", joins if joins else None) 3006 3007 comments = [c for token in (method, side, kind) if token for c in token.comments] 3008 return self.expression(exp.Join, comments=comments, **kwargs) 3009 3010 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3011 this = self._parse_conjunction() 3012 3013 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3014 return this 3015 3016 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3017 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3018 3019 return this 3020 3021 def _parse_index_params(self) -> exp.IndexParameters: 3022 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3023 3024 if self._match(TokenType.L_PAREN, advance=False): 3025 columns = self._parse_wrapped_csv(self._parse_with_operator) 3026 else: 3027 columns = None 3028 3029 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3030 partition_by = self._parse_partition_by() 3031 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3032 tablespace = ( 3033 self._parse_var(any_token=True) 3034 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3035 else None 3036 ) 3037 where = self._parse_where() 3038 3039 return self.expression( 3040 exp.IndexParameters, 3041 using=using, 3042 columns=columns, 3043 include=include, 3044 partition_by=partition_by, 3045 where=where, 3046 with_storage=with_storage, 3047 tablespace=tablespace, 3048 ) 3049 3050 def _parse_index( 3051 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3052 ) -> t.Optional[exp.Index]: 3053 if index or anonymous: 3054 unique = None 3055 primary = None 3056 amp = None 3057 3058 self._match(TokenType.ON) 3059 self._match(TokenType.TABLE) # hive 3060 table = self._parse_table_parts(schema=True) 3061 else: 3062 unique = self._match(TokenType.UNIQUE) 3063 primary = self._match_text_seq("PRIMARY") 3064 amp = self._match_text_seq("AMP") 3065 3066 if not self._match(TokenType.INDEX): 3067 return None 3068 3069 index = self._parse_id_var() 3070 table = None 3071 3072 params = self._parse_index_params() 3073 3074 return self.expression( 3075 exp.Index, 3076 this=index, 3077 table=table, 3078 unique=unique, 3079 primary=primary, 3080 amp=amp, 3081 params=params, 3082 ) 3083 3084 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3085 hints: t.List[exp.Expression] = [] 3086 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3087 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3088 hints.append( 3089 self.expression( 3090 exp.WithTableHint, 3091 expressions=self._parse_csv( 3092 lambda: self._parse_function() or self._parse_var(any_token=True) 3093 ), 3094 ) 3095 ) 3096 self._match_r_paren() 3097 else: 3098 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3099 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3100 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3101 3102 self._match_texts(("INDEX", "KEY")) 3103 if self._match(TokenType.FOR): 3104 hint.set("target", self._advance_any() and self._prev.text.upper()) 3105 3106 hint.set("expressions", self._parse_wrapped_id_vars()) 3107 hints.append(hint) 3108 3109 return hints or None 3110 3111 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3112 return ( 3113 (not schema and self._parse_function(optional_parens=False)) 3114 or self._parse_id_var(any_token=False) 3115 or self._parse_string_as_identifier() 3116 or self._parse_placeholder() 3117 ) 3118 3119 def _parse_table_parts( 3120 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3121 ) -> exp.Table: 3122 catalog = None 3123 db = None 3124 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3125 3126 while self._match(TokenType.DOT): 3127 if catalog: 3128 # This allows nesting the table in arbitrarily many dot expressions if needed 3129 table = self.expression( 3130 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3131 ) 3132 else: 3133 catalog = db 3134 db = table 3135 # "" used for tsql FROM a..b case 3136 table = self._parse_table_part(schema=schema) or "" 3137 3138 if ( 3139 wildcard 3140 and self._is_connected() 3141 and (isinstance(table, exp.Identifier) or not table) 3142 and self._match(TokenType.STAR) 3143 ): 3144 if isinstance(table, exp.Identifier): 3145 table.args["this"] += "*" 3146 else: 3147 table = exp.Identifier(this="*") 3148 3149 # We bubble up comments from the Identifier to the Table 3150 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3151 3152 if is_db_reference: 3153 catalog = db 3154 db = table 3155 table = None 3156 3157 if not table and not is_db_reference: 3158 self.raise_error(f"Expected table name but got {self._curr}") 3159 if not db and is_db_reference: 3160 self.raise_error(f"Expected database name but got {self._curr}") 3161 3162 return self.expression( 3163 exp.Table, 3164 comments=comments, 3165 this=table, 3166 db=db, 3167 catalog=catalog, 3168 pivots=self._parse_pivots(), 3169 ) 3170 3171 def _parse_table( 3172 self, 3173 schema: bool = False, 3174 joins: bool = False, 3175 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3176 parse_bracket: bool = False, 3177 is_db_reference: bool = False, 3178 parse_partition: bool = False, 3179 ) -> t.Optional[exp.Expression]: 3180 lateral = self._parse_lateral() 3181 if lateral: 3182 return lateral 3183 3184 unnest = self._parse_unnest() 3185 if unnest: 3186 return unnest 3187 3188 values = self._parse_derived_table_values() 3189 if values: 3190 return values 3191 3192 subquery = self._parse_select(table=True) 3193 if subquery: 3194 if not subquery.args.get("pivots"): 3195 subquery.set("pivots", self._parse_pivots()) 3196 return subquery 3197 3198 bracket = parse_bracket and self._parse_bracket(None) 3199 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3200 3201 only = self._match(TokenType.ONLY) 3202 3203 this = t.cast( 3204 exp.Expression, 3205 bracket 3206 or self._parse_bracket( 3207 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3208 ), 3209 ) 3210 3211 if only: 3212 this.set("only", only) 3213 3214 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3215 self._match_text_seq("*") 3216 3217 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3218 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3219 this.set("partition", self._parse_partition()) 3220 3221 if schema: 3222 return self._parse_schema(this=this) 3223 3224 version = self._parse_version() 3225 3226 if version: 3227 this.set("version", version) 3228 3229 if self.dialect.ALIAS_POST_TABLESAMPLE: 3230 table_sample = self._parse_table_sample() 3231 3232 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3233 if alias: 3234 this.set("alias", alias) 3235 3236 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3237 return self.expression( 3238 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3239 ) 3240 3241 this.set("hints", self._parse_table_hints()) 3242 3243 if not this.args.get("pivots"): 3244 this.set("pivots", self._parse_pivots()) 3245 3246 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3247 table_sample = self._parse_table_sample() 3248 3249 if table_sample: 3250 table_sample.set("this", this) 3251 this = table_sample 3252 3253 if joins: 3254 for join in self._parse_joins(): 3255 this.append("joins", join) 3256 3257 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3258 this.set("ordinality", True) 3259 this.set("alias", self._parse_table_alias()) 3260 3261 return this 3262 3263 def _parse_version(self) -> t.Optional[exp.Version]: 3264 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3265 this = "TIMESTAMP" 3266 elif self._match(TokenType.VERSION_SNAPSHOT): 3267 this = "VERSION" 3268 else: 3269 return None 3270 3271 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3272 kind = self._prev.text.upper() 3273 start = self._parse_bitwise() 3274 self._match_texts(("TO", "AND")) 3275 end = self._parse_bitwise() 3276 expression: t.Optional[exp.Expression] = self.expression( 3277 exp.Tuple, expressions=[start, end] 3278 ) 3279 elif self._match_text_seq("CONTAINED", "IN"): 3280 kind = "CONTAINED IN" 3281 expression = self.expression( 3282 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3283 ) 3284 elif self._match(TokenType.ALL): 3285 kind = "ALL" 3286 expression = None 3287 else: 3288 self._match_text_seq("AS", "OF") 3289 kind = "AS OF" 3290 expression = self._parse_type() 3291 3292 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3293 3294 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3295 if not self._match(TokenType.UNNEST): 3296 return None 3297 3298 expressions = self._parse_wrapped_csv(self._parse_equality) 3299 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3300 3301 alias = self._parse_table_alias() if with_alias else None 3302 3303 if alias: 3304 if self.dialect.UNNEST_COLUMN_ONLY: 3305 if alias.args.get("columns"): 3306 self.raise_error("Unexpected extra column alias in unnest.") 3307 3308 alias.set("columns", [alias.this]) 3309 alias.set("this", None) 3310 3311 columns = alias.args.get("columns") or [] 3312 if offset and len(expressions) < len(columns): 3313 offset = columns.pop() 3314 3315 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3316 self._match(TokenType.ALIAS) 3317 offset = self._parse_id_var( 3318 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3319 ) or exp.to_identifier("offset") 3320 3321 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3322 3323 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3324 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3325 if not is_derived and not self._match_text_seq("VALUES"): 3326 return None 3327 3328 expressions = self._parse_csv(self._parse_value) 3329 alias = self._parse_table_alias() 3330 3331 if is_derived: 3332 self._match_r_paren() 3333 3334 return self.expression( 3335 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3336 ) 3337 3338 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3339 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3340 as_modifier and self._match_text_seq("USING", "SAMPLE") 3341 ): 3342 return None 3343 3344 bucket_numerator = None 3345 bucket_denominator = None 3346 bucket_field = None 3347 percent = None 3348 size = None 3349 seed = None 3350 3351 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3352 matched_l_paren = self._match(TokenType.L_PAREN) 3353 3354 if self.TABLESAMPLE_CSV: 3355 num = None 3356 expressions = self._parse_csv(self._parse_primary) 3357 else: 3358 expressions = None 3359 num = ( 3360 self._parse_factor() 3361 if self._match(TokenType.NUMBER, advance=False) 3362 else self._parse_primary() or self._parse_placeholder() 3363 ) 3364 3365 if self._match_text_seq("BUCKET"): 3366 bucket_numerator = self._parse_number() 3367 self._match_text_seq("OUT", "OF") 3368 bucket_denominator = bucket_denominator = self._parse_number() 3369 self._match(TokenType.ON) 3370 bucket_field = self._parse_field() 3371 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3372 percent = num 3373 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3374 size = num 3375 else: 3376 percent = num 3377 3378 if matched_l_paren: 3379 self._match_r_paren() 3380 3381 if self._match(TokenType.L_PAREN): 3382 method = self._parse_var(upper=True) 3383 seed = self._match(TokenType.COMMA) and self._parse_number() 3384 self._match_r_paren() 3385 elif self._match_texts(("SEED", "REPEATABLE")): 3386 seed = self._parse_wrapped(self._parse_number) 3387 3388 return self.expression( 3389 exp.TableSample, 3390 expressions=expressions, 3391 method=method, 3392 bucket_numerator=bucket_numerator, 3393 bucket_denominator=bucket_denominator, 3394 bucket_field=bucket_field, 3395 percent=percent, 3396 size=size, 3397 seed=seed, 3398 ) 3399 3400 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3401 return list(iter(self._parse_pivot, None)) or None 3402 3403 def _parse_joins(self) -> t.Iterator[exp.Join]: 3404 return iter(self._parse_join, None) 3405 3406 # https://duckdb.org/docs/sql/statements/pivot 3407 def _parse_simplified_pivot(self) -> exp.Pivot: 3408 def _parse_on() -> t.Optional[exp.Expression]: 3409 this = self._parse_bitwise() 3410 return self._parse_in(this) if self._match(TokenType.IN) else this 3411 3412 this = self._parse_table() 3413 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3414 using = self._match(TokenType.USING) and self._parse_csv( 3415 lambda: self._parse_alias(self._parse_function()) 3416 ) 3417 group = self._parse_group() 3418 return self.expression( 3419 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3420 ) 3421 3422 def _parse_pivot_in(self) -> exp.In: 3423 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3424 this = self._parse_conjunction() 3425 3426 self._match(TokenType.ALIAS) 3427 alias = self._parse_field() 3428 if alias: 3429 return self.expression(exp.PivotAlias, this=this, alias=alias) 3430 3431 return this 3432 3433 value = self._parse_column() 3434 3435 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3436 self.raise_error("Expecting IN (") 3437 3438 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3439 3440 self._match_r_paren() 3441 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3442 3443 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3444 index = self._index 3445 include_nulls = None 3446 3447 if self._match(TokenType.PIVOT): 3448 unpivot = False 3449 elif self._match(TokenType.UNPIVOT): 3450 unpivot = True 3451 3452 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3453 if self._match_text_seq("INCLUDE", "NULLS"): 3454 include_nulls = True 3455 elif self._match_text_seq("EXCLUDE", "NULLS"): 3456 include_nulls = False 3457 else: 3458 return None 3459 3460 expressions = [] 3461 3462 if not self._match(TokenType.L_PAREN): 3463 self._retreat(index) 3464 return None 3465 3466 if unpivot: 3467 expressions = self._parse_csv(self._parse_column) 3468 else: 3469 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3470 3471 if not expressions: 3472 self.raise_error("Failed to parse PIVOT's aggregation list") 3473 3474 if not self._match(TokenType.FOR): 3475 self.raise_error("Expecting FOR") 3476 3477 field = self._parse_pivot_in() 3478 3479 self._match_r_paren() 3480 3481 pivot = self.expression( 3482 exp.Pivot, 3483 expressions=expressions, 3484 field=field, 3485 unpivot=unpivot, 3486 include_nulls=include_nulls, 3487 ) 3488 3489 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3490 pivot.set("alias", self._parse_table_alias()) 3491 3492 if not unpivot: 3493 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3494 3495 columns: t.List[exp.Expression] = [] 3496 for fld in pivot.args["field"].expressions: 3497 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3498 for name in names: 3499 if self.PREFIXED_PIVOT_COLUMNS: 3500 name = f"{name}_{field_name}" if name else field_name 3501 else: 3502 name = f"{field_name}_{name}" if name else field_name 3503 3504 columns.append(exp.to_identifier(name)) 3505 3506 pivot.set("columns", columns) 3507 3508 return pivot 3509 3510 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3511 return [agg.alias for agg in aggregations] 3512 3513 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3514 if not skip_where_token and not self._match(TokenType.PREWHERE): 3515 return None 3516 3517 return self.expression( 3518 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3519 ) 3520 3521 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3522 if not skip_where_token and not self._match(TokenType.WHERE): 3523 return None 3524 3525 return self.expression( 3526 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3527 ) 3528 3529 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3530 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3531 return None 3532 3533 elements: t.Dict[str, t.Any] = defaultdict(list) 3534 3535 if self._match(TokenType.ALL): 3536 elements["all"] = True 3537 elif self._match(TokenType.DISTINCT): 3538 elements["all"] = False 3539 3540 while True: 3541 expressions = self._parse_csv(self._parse_conjunction) 3542 if expressions: 3543 elements["expressions"].extend(expressions) 3544 3545 grouping_sets = self._parse_grouping_sets() 3546 if grouping_sets: 3547 elements["grouping_sets"].extend(grouping_sets) 3548 3549 rollup = None 3550 cube = None 3551 totals = None 3552 3553 index = self._index 3554 with_ = self._match(TokenType.WITH) 3555 if self._match(TokenType.ROLLUP): 3556 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3557 elements["rollup"].extend(ensure_list(rollup)) 3558 3559 if self._match(TokenType.CUBE): 3560 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3561 elements["cube"].extend(ensure_list(cube)) 3562 3563 if self._match_text_seq("TOTALS"): 3564 totals = True 3565 elements["totals"] = True # type: ignore 3566 3567 if not (grouping_sets or rollup or cube or totals): 3568 if with_: 3569 self._retreat(index) 3570 break 3571 3572 return self.expression(exp.Group, **elements) # type: ignore 3573 3574 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3575 if not self._match(TokenType.GROUPING_SETS): 3576 return None 3577 3578 return self._parse_wrapped_csv(self._parse_grouping_set) 3579 3580 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3581 if self._match(TokenType.L_PAREN): 3582 grouping_set = self._parse_csv(self._parse_column) 3583 self._match_r_paren() 3584 return self.expression(exp.Tuple, expressions=grouping_set) 3585 3586 return self._parse_column() 3587 3588 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3589 if not skip_having_token and not self._match(TokenType.HAVING): 3590 return None 3591 return self.expression(exp.Having, this=self._parse_conjunction()) 3592 3593 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3594 if not self._match(TokenType.QUALIFY): 3595 return None 3596 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3597 3598 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3599 if skip_start_token: 3600 start = None 3601 elif self._match(TokenType.START_WITH): 3602 start = self._parse_conjunction() 3603 else: 3604 return None 3605 3606 self._match(TokenType.CONNECT_BY) 3607 nocycle = self._match_text_seq("NOCYCLE") 3608 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3609 exp.Prior, this=self._parse_bitwise() 3610 ) 3611 connect = self._parse_conjunction() 3612 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3613 3614 if not start and self._match(TokenType.START_WITH): 3615 start = self._parse_conjunction() 3616 3617 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3618 3619 def _parse_name_as_expression(self) -> exp.Alias: 3620 return self.expression( 3621 exp.Alias, 3622 alias=self._parse_id_var(any_token=True), 3623 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3624 ) 3625 3626 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3627 if self._match_text_seq("INTERPOLATE"): 3628 return self._parse_wrapped_csv(self._parse_name_as_expression) 3629 return None 3630 3631 def _parse_order( 3632 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3633 ) -> t.Optional[exp.Expression]: 3634 siblings = None 3635 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3636 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3637 return this 3638 3639 siblings = True 3640 3641 return self.expression( 3642 exp.Order, 3643 this=this, 3644 expressions=self._parse_csv(self._parse_ordered), 3645 interpolate=self._parse_interpolate(), 3646 siblings=siblings, 3647 ) 3648 3649 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3650 if not self._match(token): 3651 return None 3652 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3653 3654 def _parse_ordered( 3655 self, parse_method: t.Optional[t.Callable] = None 3656 ) -> t.Optional[exp.Ordered]: 3657 this = parse_method() if parse_method else self._parse_conjunction() 3658 if not this: 3659 return None 3660 3661 asc = self._match(TokenType.ASC) 3662 desc = self._match(TokenType.DESC) or (asc and False) 3663 3664 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3665 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3666 3667 nulls_first = is_nulls_first or False 3668 explicitly_null_ordered = is_nulls_first or is_nulls_last 3669 3670 if ( 3671 not explicitly_null_ordered 3672 and ( 3673 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3674 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3675 ) 3676 and self.dialect.NULL_ORDERING != "nulls_are_last" 3677 ): 3678 nulls_first = True 3679 3680 if self._match_text_seq("WITH", "FILL"): 3681 with_fill = self.expression( 3682 exp.WithFill, 3683 **{ # type: ignore 3684 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3685 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3686 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3687 }, 3688 ) 3689 else: 3690 with_fill = None 3691 3692 return self.expression( 3693 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3694 ) 3695 3696 def _parse_limit( 3697 self, 3698 this: t.Optional[exp.Expression] = None, 3699 top: bool = False, 3700 skip_limit_token: bool = False, 3701 ) -> t.Optional[exp.Expression]: 3702 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3703 comments = self._prev_comments 3704 if top: 3705 limit_paren = self._match(TokenType.L_PAREN) 3706 expression = self._parse_term() if limit_paren else self._parse_number() 3707 3708 if limit_paren: 3709 self._match_r_paren() 3710 else: 3711 expression = self._parse_term() 3712 3713 if self._match(TokenType.COMMA): 3714 offset = expression 3715 expression = self._parse_term() 3716 else: 3717 offset = None 3718 3719 limit_exp = self.expression( 3720 exp.Limit, 3721 this=this, 3722 expression=expression, 3723 offset=offset, 3724 comments=comments, 3725 expressions=self._parse_limit_by(), 3726 ) 3727 3728 return limit_exp 3729 3730 if self._match(TokenType.FETCH): 3731 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3732 direction = self._prev.text.upper() if direction else "FIRST" 3733 3734 count = self._parse_field(tokens=self.FETCH_TOKENS) 3735 percent = self._match(TokenType.PERCENT) 3736 3737 self._match_set((TokenType.ROW, TokenType.ROWS)) 3738 3739 only = self._match_text_seq("ONLY") 3740 with_ties = self._match_text_seq("WITH", "TIES") 3741 3742 if only and with_ties: 3743 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3744 3745 return self.expression( 3746 exp.Fetch, 3747 direction=direction, 3748 count=count, 3749 percent=percent, 3750 with_ties=with_ties, 3751 ) 3752 3753 return this 3754 3755 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3756 if not self._match(TokenType.OFFSET): 3757 return this 3758 3759 count = self._parse_term() 3760 self._match_set((TokenType.ROW, TokenType.ROWS)) 3761 3762 return self.expression( 3763 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3764 ) 3765 3766 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3767 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3768 3769 def _parse_locks(self) -> t.List[exp.Lock]: 3770 locks = [] 3771 while True: 3772 if self._match_text_seq("FOR", "UPDATE"): 3773 update = True 3774 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3775 "LOCK", "IN", "SHARE", "MODE" 3776 ): 3777 update = False 3778 else: 3779 break 3780 3781 expressions = None 3782 if self._match_text_seq("OF"): 3783 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3784 3785 wait: t.Optional[bool | exp.Expression] = None 3786 if self._match_text_seq("NOWAIT"): 3787 wait = True 3788 elif self._match_text_seq("WAIT"): 3789 wait = self._parse_primary() 3790 elif self._match_text_seq("SKIP", "LOCKED"): 3791 wait = False 3792 3793 locks.append( 3794 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3795 ) 3796 3797 return locks 3798 3799 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3800 while this and self._match_set(self.SET_OPERATIONS): 3801 token_type = self._prev.token_type 3802 3803 if token_type == TokenType.UNION: 3804 operation = exp.Union 3805 elif token_type == TokenType.EXCEPT: 3806 operation = exp.Except 3807 else: 3808 operation = exp.Intersect 3809 3810 comments = self._prev.comments 3811 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3812 by_name = self._match_text_seq("BY", "NAME") 3813 expression = self._parse_select(nested=True, parse_set_operation=False) 3814 3815 this = self.expression( 3816 operation, 3817 comments=comments, 3818 this=this, 3819 distinct=distinct, 3820 by_name=by_name, 3821 expression=expression, 3822 ) 3823 3824 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3825 expression = this.expression 3826 3827 if expression: 3828 for arg in self.UNION_MODIFIERS: 3829 expr = expression.args.get(arg) 3830 if expr: 3831 this.set(arg, expr.pop()) 3832 3833 return this 3834 3835 def _parse_expression(self) -> t.Optional[exp.Expression]: 3836 return self._parse_alias(self._parse_conjunction()) 3837 3838 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3839 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3840 3841 def _parse_equality(self) -> t.Optional[exp.Expression]: 3842 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3843 3844 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3845 return self._parse_tokens(self._parse_range, self.COMPARISON) 3846 3847 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3848 this = this or self._parse_bitwise() 3849 negate = self._match(TokenType.NOT) 3850 3851 if self._match_set(self.RANGE_PARSERS): 3852 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3853 if not expression: 3854 return this 3855 3856 this = expression 3857 elif self._match(TokenType.ISNULL): 3858 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3859 3860 # Postgres supports ISNULL and NOTNULL for conditions. 3861 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3862 if self._match(TokenType.NOTNULL): 3863 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3864 this = self.expression(exp.Not, this=this) 3865 3866 if negate: 3867 this = self.expression(exp.Not, this=this) 3868 3869 if self._match(TokenType.IS): 3870 this = self._parse_is(this) 3871 3872 return this 3873 3874 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3875 index = self._index - 1 3876 negate = self._match(TokenType.NOT) 3877 3878 if self._match_text_seq("DISTINCT", "FROM"): 3879 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3880 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3881 3882 expression = self._parse_null() or self._parse_boolean() 3883 if not expression: 3884 self._retreat(index) 3885 return None 3886 3887 this = self.expression(exp.Is, this=this, expression=expression) 3888 return self.expression(exp.Not, this=this) if negate else this 3889 3890 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3891 unnest = self._parse_unnest(with_alias=False) 3892 if unnest: 3893 this = self.expression(exp.In, this=this, unnest=unnest) 3894 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3895 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3896 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3897 3898 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3899 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 3900 else: 3901 this = self.expression(exp.In, this=this, expressions=expressions) 3902 3903 if matched_l_paren: 3904 self._match_r_paren(this) 3905 elif not self._match(TokenType.R_BRACKET, expression=this): 3906 self.raise_error("Expecting ]") 3907 else: 3908 this = self.expression(exp.In, this=this, field=self._parse_field()) 3909 3910 return this 3911 3912 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3913 low = self._parse_bitwise() 3914 self._match(TokenType.AND) 3915 high = self._parse_bitwise() 3916 return self.expression(exp.Between, this=this, low=low, high=high) 3917 3918 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3919 if not self._match(TokenType.ESCAPE): 3920 return this 3921 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3922 3923 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3924 index = self._index 3925 3926 if not self._match(TokenType.INTERVAL) and match_interval: 3927 return None 3928 3929 if self._match(TokenType.STRING, advance=False): 3930 this = self._parse_primary() 3931 else: 3932 this = self._parse_term() 3933 3934 if not this or ( 3935 isinstance(this, exp.Column) 3936 and not this.table 3937 and not this.this.quoted 3938 and this.name.upper() == "IS" 3939 ): 3940 self._retreat(index) 3941 return None 3942 3943 unit = self._parse_function() or ( 3944 not self._match(TokenType.ALIAS, advance=False) 3945 and self._parse_var(any_token=True, upper=True) 3946 ) 3947 3948 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3949 # each INTERVAL expression into this canonical form so it's easy to transpile 3950 if this and this.is_number: 3951 this = exp.Literal.string(this.name) 3952 elif this and this.is_string: 3953 parts = this.name.split() 3954 3955 if len(parts) == 2: 3956 if unit: 3957 # This is not actually a unit, it's something else (e.g. a "window side") 3958 unit = None 3959 self._retreat(self._index - 1) 3960 3961 this = exp.Literal.string(parts[0]) 3962 unit = self.expression(exp.Var, this=parts[1].upper()) 3963 3964 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 3965 unit = self.expression( 3966 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 3967 ) 3968 3969 return self.expression(exp.Interval, this=this, unit=unit) 3970 3971 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3972 this = self._parse_term() 3973 3974 while True: 3975 if self._match_set(self.BITWISE): 3976 this = self.expression( 3977 self.BITWISE[self._prev.token_type], 3978 this=this, 3979 expression=self._parse_term(), 3980 ) 3981 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3982 this = self.expression( 3983 exp.DPipe, 3984 this=this, 3985 expression=self._parse_term(), 3986 safe=not self.dialect.STRICT_STRING_CONCAT, 3987 ) 3988 elif self._match(TokenType.DQMARK): 3989 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3990 elif self._match_pair(TokenType.LT, TokenType.LT): 3991 this = self.expression( 3992 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3993 ) 3994 elif self._match_pair(TokenType.GT, TokenType.GT): 3995 this = self.expression( 3996 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3997 ) 3998 else: 3999 break 4000 4001 return this 4002 4003 def _parse_term(self) -> t.Optional[exp.Expression]: 4004 return self._parse_tokens(self._parse_factor, self.TERM) 4005 4006 def _parse_factor(self) -> t.Optional[exp.Expression]: 4007 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4008 this = parse_method() 4009 4010 while self._match_set(self.FACTOR): 4011 this = self.expression( 4012 self.FACTOR[self._prev.token_type], 4013 this=this, 4014 comments=self._prev_comments, 4015 expression=parse_method(), 4016 ) 4017 if isinstance(this, exp.Div): 4018 this.args["typed"] = self.dialect.TYPED_DIVISION 4019 this.args["safe"] = self.dialect.SAFE_DIVISION 4020 4021 return this 4022 4023 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4024 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4025 4026 def _parse_unary(self) -> t.Optional[exp.Expression]: 4027 if self._match_set(self.UNARY_PARSERS): 4028 return self.UNARY_PARSERS[self._prev.token_type](self) 4029 return self._parse_at_time_zone(self._parse_type()) 4030 4031 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 4032 interval = parse_interval and self._parse_interval() 4033 if interval: 4034 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4035 while True: 4036 index = self._index 4037 self._match(TokenType.PLUS) 4038 4039 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4040 self._retreat(index) 4041 break 4042 4043 interval = self.expression( # type: ignore 4044 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4045 ) 4046 4047 return interval 4048 4049 index = self._index 4050 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4051 this = self._parse_column() 4052 4053 if data_type: 4054 if isinstance(this, exp.Literal): 4055 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4056 if parser: 4057 return parser(self, this, data_type) 4058 return self.expression(exp.Cast, this=this, to=data_type) 4059 if not data_type.expressions: 4060 self._retreat(index) 4061 return self._parse_column() 4062 return self._parse_column_ops(data_type) 4063 4064 return this and self._parse_column_ops(this) 4065 4066 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4067 this = self._parse_type() 4068 if not this: 4069 return None 4070 4071 if isinstance(this, exp.Column) and not this.table: 4072 this = exp.var(this.name.upper()) 4073 4074 return self.expression( 4075 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4076 ) 4077 4078 def _parse_types( 4079 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4080 ) -> t.Optional[exp.Expression]: 4081 index = self._index 4082 4083 prefix = self._match_text_seq("SYSUDTLIB", ".") 4084 4085 if not self._match_set(self.TYPE_TOKENS): 4086 identifier = allow_identifiers and self._parse_id_var( 4087 any_token=False, tokens=(TokenType.VAR,) 4088 ) 4089 if identifier: 4090 tokens = self.dialect.tokenize(identifier.name) 4091 4092 if len(tokens) != 1: 4093 self.raise_error("Unexpected identifier", self._prev) 4094 4095 if tokens[0].token_type in self.TYPE_TOKENS: 4096 self._prev = tokens[0] 4097 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4098 type_name = identifier.name 4099 4100 while self._match(TokenType.DOT): 4101 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4102 4103 return exp.DataType.build(type_name, udt=True) 4104 else: 4105 self._retreat(self._index - 1) 4106 return None 4107 else: 4108 return None 4109 4110 type_token = self._prev.token_type 4111 4112 if type_token == TokenType.PSEUDO_TYPE: 4113 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4114 4115 if type_token == TokenType.OBJECT_IDENTIFIER: 4116 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4117 4118 nested = type_token in self.NESTED_TYPE_TOKENS 4119 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4120 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4121 expressions = None 4122 maybe_func = False 4123 4124 if self._match(TokenType.L_PAREN): 4125 if is_struct: 4126 expressions = self._parse_csv(self._parse_struct_types) 4127 elif nested: 4128 expressions = self._parse_csv( 4129 lambda: self._parse_types( 4130 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4131 ) 4132 ) 4133 elif type_token in self.ENUM_TYPE_TOKENS: 4134 expressions = self._parse_csv(self._parse_equality) 4135 elif is_aggregate: 4136 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4137 any_token=False, tokens=(TokenType.VAR,) 4138 ) 4139 if not func_or_ident or not self._match(TokenType.COMMA): 4140 return None 4141 expressions = self._parse_csv( 4142 lambda: self._parse_types( 4143 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4144 ) 4145 ) 4146 expressions.insert(0, func_or_ident) 4147 else: 4148 expressions = self._parse_csv(self._parse_type_size) 4149 4150 if not expressions or not self._match(TokenType.R_PAREN): 4151 self._retreat(index) 4152 return None 4153 4154 maybe_func = True 4155 4156 this: t.Optional[exp.Expression] = None 4157 values: t.Optional[t.List[exp.Expression]] = None 4158 4159 if nested and self._match(TokenType.LT): 4160 if is_struct: 4161 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4162 else: 4163 expressions = self._parse_csv( 4164 lambda: self._parse_types( 4165 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4166 ) 4167 ) 4168 4169 if not self._match(TokenType.GT): 4170 self.raise_error("Expecting >") 4171 4172 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4173 values = self._parse_csv(self._parse_conjunction) 4174 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4175 4176 if type_token in self.TIMESTAMPS: 4177 if self._match_text_seq("WITH", "TIME", "ZONE"): 4178 maybe_func = False 4179 tz_type = ( 4180 exp.DataType.Type.TIMETZ 4181 if type_token in self.TIMES 4182 else exp.DataType.Type.TIMESTAMPTZ 4183 ) 4184 this = exp.DataType(this=tz_type, expressions=expressions) 4185 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4186 maybe_func = False 4187 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4188 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4189 maybe_func = False 4190 elif type_token == TokenType.INTERVAL: 4191 unit = self._parse_var(upper=True) 4192 if unit: 4193 if self._match_text_seq("TO"): 4194 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4195 4196 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4197 else: 4198 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4199 4200 if maybe_func and check_func: 4201 index2 = self._index 4202 peek = self._parse_string() 4203 4204 if not peek: 4205 self._retreat(index) 4206 return None 4207 4208 self._retreat(index2) 4209 4210 if not this: 4211 if self._match_text_seq("UNSIGNED"): 4212 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4213 if not unsigned_type_token: 4214 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4215 4216 type_token = unsigned_type_token or type_token 4217 4218 this = exp.DataType( 4219 this=exp.DataType.Type[type_token.value], 4220 expressions=expressions, 4221 nested=nested, 4222 values=values, 4223 prefix=prefix, 4224 ) 4225 4226 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4227 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4228 4229 return this 4230 4231 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4232 index = self._index 4233 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4234 self._match(TokenType.COLON) 4235 column_def = self._parse_column_def(this) 4236 4237 if type_required and ( 4238 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4239 ): 4240 self._retreat(index) 4241 return self._parse_types() 4242 4243 return column_def 4244 4245 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4246 if not self._match_text_seq("AT", "TIME", "ZONE"): 4247 return this 4248 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4249 4250 def _parse_column(self) -> t.Optional[exp.Expression]: 4251 this = self._parse_column_reference() 4252 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4253 4254 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4255 this = self._parse_field() 4256 if ( 4257 not this 4258 and self._match(TokenType.VALUES, advance=False) 4259 and self.VALUES_FOLLOWED_BY_PAREN 4260 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4261 ): 4262 this = self._parse_id_var() 4263 4264 if isinstance(this, exp.Identifier): 4265 # We bubble up comments from the Identifier to the Column 4266 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4267 4268 return this 4269 4270 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4271 this = self._parse_bracket(this) 4272 4273 while self._match_set(self.COLUMN_OPERATORS): 4274 op_token = self._prev.token_type 4275 op = self.COLUMN_OPERATORS.get(op_token) 4276 4277 if op_token == TokenType.DCOLON: 4278 field = self._parse_types() 4279 if not field: 4280 self.raise_error("Expected type") 4281 elif op and self._curr: 4282 field = self._parse_column_reference() 4283 else: 4284 field = self._parse_field(any_token=True, anonymous_func=True) 4285 4286 if isinstance(field, exp.Func) and this: 4287 # bigquery allows function calls like x.y.count(...) 4288 # SAFE.SUBSTR(...) 4289 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4290 this = exp.replace_tree( 4291 this, 4292 lambda n: ( 4293 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4294 if n.table 4295 else n.this 4296 ) 4297 if isinstance(n, exp.Column) 4298 else n, 4299 ) 4300 4301 if op: 4302 this = op(self, this, field) 4303 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4304 this = self.expression( 4305 exp.Column, 4306 this=field, 4307 table=this.this, 4308 db=this.args.get("table"), 4309 catalog=this.args.get("db"), 4310 ) 4311 else: 4312 this = self.expression(exp.Dot, this=this, expression=field) 4313 this = self._parse_bracket(this) 4314 return this 4315 4316 def _parse_primary(self) -> t.Optional[exp.Expression]: 4317 if self._match_set(self.PRIMARY_PARSERS): 4318 token_type = self._prev.token_type 4319 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4320 4321 if token_type == TokenType.STRING: 4322 expressions = [primary] 4323 while self._match(TokenType.STRING): 4324 expressions.append(exp.Literal.string(self._prev.text)) 4325 4326 if len(expressions) > 1: 4327 return self.expression(exp.Concat, expressions=expressions) 4328 4329 return primary 4330 4331 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4332 return exp.Literal.number(f"0.{self._prev.text}") 4333 4334 if self._match(TokenType.L_PAREN): 4335 comments = self._prev_comments 4336 query = self._parse_select() 4337 4338 if query: 4339 expressions = [query] 4340 else: 4341 expressions = self._parse_expressions() 4342 4343 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4344 4345 if not this and self._match(TokenType.R_PAREN, advance=False): 4346 this = self.expression(exp.Tuple) 4347 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4348 this = self._parse_set_operations( 4349 self._parse_subquery(this=this, parse_alias=False) 4350 ) 4351 elif isinstance(this, exp.Subquery): 4352 this = self._parse_subquery( 4353 this=self._parse_set_operations(this), parse_alias=False 4354 ) 4355 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4356 this = self.expression(exp.Tuple, expressions=expressions) 4357 else: 4358 this = self.expression(exp.Paren, this=this) 4359 4360 if this: 4361 this.add_comments(comments) 4362 4363 self._match_r_paren(expression=this) 4364 return this 4365 4366 return None 4367 4368 def _parse_field( 4369 self, 4370 any_token: bool = False, 4371 tokens: t.Optional[t.Collection[TokenType]] = None, 4372 anonymous_func: bool = False, 4373 ) -> t.Optional[exp.Expression]: 4374 if anonymous_func: 4375 field = ( 4376 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4377 or self._parse_primary() 4378 ) 4379 else: 4380 field = self._parse_primary() or self._parse_function( 4381 anonymous=anonymous_func, any_token=any_token 4382 ) 4383 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4384 4385 def _parse_function( 4386 self, 4387 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4388 anonymous: bool = False, 4389 optional_parens: bool = True, 4390 any_token: bool = False, 4391 ) -> t.Optional[exp.Expression]: 4392 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4393 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4394 fn_syntax = False 4395 if ( 4396 self._match(TokenType.L_BRACE, advance=False) 4397 and self._next 4398 and self._next.text.upper() == "FN" 4399 ): 4400 self._advance(2) 4401 fn_syntax = True 4402 4403 func = self._parse_function_call( 4404 functions=functions, 4405 anonymous=anonymous, 4406 optional_parens=optional_parens, 4407 any_token=any_token, 4408 ) 4409 4410 if fn_syntax: 4411 self._match(TokenType.R_BRACE) 4412 4413 return func 4414 4415 def _parse_function_call( 4416 self, 4417 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4418 anonymous: bool = False, 4419 optional_parens: bool = True, 4420 any_token: bool = False, 4421 ) -> t.Optional[exp.Expression]: 4422 if not self._curr: 4423 return None 4424 4425 comments = self._curr.comments 4426 token_type = self._curr.token_type 4427 this = self._curr.text 4428 upper = this.upper() 4429 4430 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4431 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4432 self._advance() 4433 return self._parse_window(parser(self)) 4434 4435 if not self._next or self._next.token_type != TokenType.L_PAREN: 4436 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4437 self._advance() 4438 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4439 4440 return None 4441 4442 if any_token: 4443 if token_type in self.RESERVED_TOKENS: 4444 return None 4445 elif token_type not in self.FUNC_TOKENS: 4446 return None 4447 4448 self._advance(2) 4449 4450 parser = self.FUNCTION_PARSERS.get(upper) 4451 if parser and not anonymous: 4452 this = parser(self) 4453 else: 4454 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4455 4456 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4457 this = self.expression(subquery_predicate, this=self._parse_select()) 4458 self._match_r_paren() 4459 return this 4460 4461 if functions is None: 4462 functions = self.FUNCTIONS 4463 4464 function = functions.get(upper) 4465 4466 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4467 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4468 4469 if alias: 4470 args = self._kv_to_prop_eq(args) 4471 4472 if function and not anonymous: 4473 if "dialect" in function.__code__.co_varnames: 4474 func = function(args, dialect=self.dialect) 4475 else: 4476 func = function(args) 4477 4478 func = self.validate_expression(func, args) 4479 if not self.dialect.NORMALIZE_FUNCTIONS: 4480 func.meta["name"] = this 4481 4482 this = func 4483 else: 4484 if token_type == TokenType.IDENTIFIER: 4485 this = exp.Identifier(this=this, quoted=True) 4486 this = self.expression(exp.Anonymous, this=this, expressions=args) 4487 4488 if isinstance(this, exp.Expression): 4489 this.add_comments(comments) 4490 4491 self._match_r_paren(this) 4492 return self._parse_window(this) 4493 4494 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4495 transformed = [] 4496 4497 for e in expressions: 4498 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4499 if isinstance(e, exp.Alias): 4500 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4501 4502 if not isinstance(e, exp.PropertyEQ): 4503 e = self.expression( 4504 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4505 ) 4506 4507 if isinstance(e.this, exp.Column): 4508 e.this.replace(e.this.this) 4509 4510 transformed.append(e) 4511 4512 return transformed 4513 4514 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4515 return self._parse_column_def(self._parse_id_var()) 4516 4517 def _parse_user_defined_function( 4518 self, kind: t.Optional[TokenType] = None 4519 ) -> t.Optional[exp.Expression]: 4520 this = self._parse_id_var() 4521 4522 while self._match(TokenType.DOT): 4523 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4524 4525 if not self._match(TokenType.L_PAREN): 4526 return this 4527 4528 expressions = self._parse_csv(self._parse_function_parameter) 4529 self._match_r_paren() 4530 return self.expression( 4531 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4532 ) 4533 4534 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4535 literal = self._parse_primary() 4536 if literal: 4537 return self.expression(exp.Introducer, this=token.text, expression=literal) 4538 4539 return self.expression(exp.Identifier, this=token.text) 4540 4541 def _parse_session_parameter(self) -> exp.SessionParameter: 4542 kind = None 4543 this = self._parse_id_var() or self._parse_primary() 4544 4545 if this and self._match(TokenType.DOT): 4546 kind = this.name 4547 this = self._parse_var() or self._parse_primary() 4548 4549 return self.expression(exp.SessionParameter, this=this, kind=kind) 4550 4551 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4552 index = self._index 4553 4554 if self._match(TokenType.L_PAREN): 4555 expressions = t.cast( 4556 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4557 ) 4558 4559 if not self._match(TokenType.R_PAREN): 4560 self._retreat(index) 4561 else: 4562 expressions = [self._parse_id_var()] 4563 4564 if self._match_set(self.LAMBDAS): 4565 return self.LAMBDAS[self._prev.token_type](self, expressions) 4566 4567 self._retreat(index) 4568 4569 this: t.Optional[exp.Expression] 4570 4571 if self._match(TokenType.DISTINCT): 4572 this = self.expression( 4573 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4574 ) 4575 else: 4576 this = self._parse_select_or_expression(alias=alias) 4577 4578 return self._parse_limit( 4579 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4580 ) 4581 4582 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4583 index = self._index 4584 if not self._match(TokenType.L_PAREN): 4585 return this 4586 4587 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4588 # expr can be of both types 4589 if self._match_set(self.SELECT_START_TOKENS): 4590 self._retreat(index) 4591 return this 4592 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4593 self._match_r_paren() 4594 return self.expression(exp.Schema, this=this, expressions=args) 4595 4596 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4597 return self._parse_column_def(self._parse_field(any_token=True)) 4598 4599 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4600 # column defs are not really columns, they're identifiers 4601 if isinstance(this, exp.Column): 4602 this = this.this 4603 4604 kind = self._parse_types(schema=True) 4605 4606 if self._match_text_seq("FOR", "ORDINALITY"): 4607 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4608 4609 constraints: t.List[exp.Expression] = [] 4610 4611 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4612 ("ALIAS", "MATERIALIZED") 4613 ): 4614 persisted = self._prev.text.upper() == "MATERIALIZED" 4615 constraints.append( 4616 self.expression( 4617 exp.ComputedColumnConstraint, 4618 this=self._parse_conjunction(), 4619 persisted=persisted or self._match_text_seq("PERSISTED"), 4620 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4621 ) 4622 ) 4623 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4624 self._match(TokenType.ALIAS) 4625 constraints.append( 4626 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4627 ) 4628 4629 while True: 4630 constraint = self._parse_column_constraint() 4631 if not constraint: 4632 break 4633 constraints.append(constraint) 4634 4635 if not kind and not constraints: 4636 return this 4637 4638 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4639 4640 def _parse_auto_increment( 4641 self, 4642 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4643 start = None 4644 increment = None 4645 4646 if self._match(TokenType.L_PAREN, advance=False): 4647 args = self._parse_wrapped_csv(self._parse_bitwise) 4648 start = seq_get(args, 0) 4649 increment = seq_get(args, 1) 4650 elif self._match_text_seq("START"): 4651 start = self._parse_bitwise() 4652 self._match_text_seq("INCREMENT") 4653 increment = self._parse_bitwise() 4654 4655 if start and increment: 4656 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4657 4658 return exp.AutoIncrementColumnConstraint() 4659 4660 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4661 if not self._match_text_seq("REFRESH"): 4662 self._retreat(self._index - 1) 4663 return None 4664 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4665 4666 def _parse_compress(self) -> exp.CompressColumnConstraint: 4667 if self._match(TokenType.L_PAREN, advance=False): 4668 return self.expression( 4669 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4670 ) 4671 4672 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4673 4674 def _parse_generated_as_identity( 4675 self, 4676 ) -> ( 4677 exp.GeneratedAsIdentityColumnConstraint 4678 | exp.ComputedColumnConstraint 4679 | exp.GeneratedAsRowColumnConstraint 4680 ): 4681 if self._match_text_seq("BY", "DEFAULT"): 4682 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4683 this = self.expression( 4684 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4685 ) 4686 else: 4687 self._match_text_seq("ALWAYS") 4688 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4689 4690 self._match(TokenType.ALIAS) 4691 4692 if self._match_text_seq("ROW"): 4693 start = self._match_text_seq("START") 4694 if not start: 4695 self._match(TokenType.END) 4696 hidden = self._match_text_seq("HIDDEN") 4697 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4698 4699 identity = self._match_text_seq("IDENTITY") 4700 4701 if self._match(TokenType.L_PAREN): 4702 if self._match(TokenType.START_WITH): 4703 this.set("start", self._parse_bitwise()) 4704 if self._match_text_seq("INCREMENT", "BY"): 4705 this.set("increment", self._parse_bitwise()) 4706 if self._match_text_seq("MINVALUE"): 4707 this.set("minvalue", self._parse_bitwise()) 4708 if self._match_text_seq("MAXVALUE"): 4709 this.set("maxvalue", self._parse_bitwise()) 4710 4711 if self._match_text_seq("CYCLE"): 4712 this.set("cycle", True) 4713 elif self._match_text_seq("NO", "CYCLE"): 4714 this.set("cycle", False) 4715 4716 if not identity: 4717 this.set("expression", self._parse_range()) 4718 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4719 args = self._parse_csv(self._parse_bitwise) 4720 this.set("start", seq_get(args, 0)) 4721 this.set("increment", seq_get(args, 1)) 4722 4723 self._match_r_paren() 4724 4725 return this 4726 4727 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4728 self._match_text_seq("LENGTH") 4729 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4730 4731 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4732 if self._match_text_seq("NULL"): 4733 return self.expression(exp.NotNullColumnConstraint) 4734 if self._match_text_seq("CASESPECIFIC"): 4735 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4736 if self._match_text_seq("FOR", "REPLICATION"): 4737 return self.expression(exp.NotForReplicationColumnConstraint) 4738 return None 4739 4740 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4741 if self._match(TokenType.CONSTRAINT): 4742 this = self._parse_id_var() 4743 else: 4744 this = None 4745 4746 if self._match_texts(self.CONSTRAINT_PARSERS): 4747 return self.expression( 4748 exp.ColumnConstraint, 4749 this=this, 4750 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4751 ) 4752 4753 return this 4754 4755 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4756 if not self._match(TokenType.CONSTRAINT): 4757 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4758 4759 return self.expression( 4760 exp.Constraint, 4761 this=self._parse_id_var(), 4762 expressions=self._parse_unnamed_constraints(), 4763 ) 4764 4765 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4766 constraints = [] 4767 while True: 4768 constraint = self._parse_unnamed_constraint() or self._parse_function() 4769 if not constraint: 4770 break 4771 constraints.append(constraint) 4772 4773 return constraints 4774 4775 def _parse_unnamed_constraint( 4776 self, constraints: t.Optional[t.Collection[str]] = None 4777 ) -> t.Optional[exp.Expression]: 4778 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4779 constraints or self.CONSTRAINT_PARSERS 4780 ): 4781 return None 4782 4783 constraint = self._prev.text.upper() 4784 if constraint not in self.CONSTRAINT_PARSERS: 4785 self.raise_error(f"No parser found for schema constraint {constraint}.") 4786 4787 return self.CONSTRAINT_PARSERS[constraint](self) 4788 4789 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4790 self._match_text_seq("KEY") 4791 return self.expression( 4792 exp.UniqueColumnConstraint, 4793 this=self._parse_schema(self._parse_id_var(any_token=False)), 4794 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4795 on_conflict=self._parse_on_conflict(), 4796 ) 4797 4798 def _parse_key_constraint_options(self) -> t.List[str]: 4799 options = [] 4800 while True: 4801 if not self._curr: 4802 break 4803 4804 if self._match(TokenType.ON): 4805 action = None 4806 on = self._advance_any() and self._prev.text 4807 4808 if self._match_text_seq("NO", "ACTION"): 4809 action = "NO ACTION" 4810 elif self._match_text_seq("CASCADE"): 4811 action = "CASCADE" 4812 elif self._match_text_seq("RESTRICT"): 4813 action = "RESTRICT" 4814 elif self._match_pair(TokenType.SET, TokenType.NULL): 4815 action = "SET NULL" 4816 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4817 action = "SET DEFAULT" 4818 else: 4819 self.raise_error("Invalid key constraint") 4820 4821 options.append(f"ON {on} {action}") 4822 elif self._match_text_seq("NOT", "ENFORCED"): 4823 options.append("NOT ENFORCED") 4824 elif self._match_text_seq("DEFERRABLE"): 4825 options.append("DEFERRABLE") 4826 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4827 options.append("INITIALLY DEFERRED") 4828 elif self._match_text_seq("NORELY"): 4829 options.append("NORELY") 4830 elif self._match_text_seq("MATCH", "FULL"): 4831 options.append("MATCH FULL") 4832 else: 4833 break 4834 4835 return options 4836 4837 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4838 if match and not self._match(TokenType.REFERENCES): 4839 return None 4840 4841 expressions = None 4842 this = self._parse_table(schema=True) 4843 options = self._parse_key_constraint_options() 4844 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4845 4846 def _parse_foreign_key(self) -> exp.ForeignKey: 4847 expressions = self._parse_wrapped_id_vars() 4848 reference = self._parse_references() 4849 options = {} 4850 4851 while self._match(TokenType.ON): 4852 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4853 self.raise_error("Expected DELETE or UPDATE") 4854 4855 kind = self._prev.text.lower() 4856 4857 if self._match_text_seq("NO", "ACTION"): 4858 action = "NO ACTION" 4859 elif self._match(TokenType.SET): 4860 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4861 action = "SET " + self._prev.text.upper() 4862 else: 4863 self._advance() 4864 action = self._prev.text.upper() 4865 4866 options[kind] = action 4867 4868 return self.expression( 4869 exp.ForeignKey, 4870 expressions=expressions, 4871 reference=reference, 4872 **options, # type: ignore 4873 ) 4874 4875 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4876 return self._parse_field() 4877 4878 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4879 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4880 self._retreat(self._index - 1) 4881 return None 4882 4883 id_vars = self._parse_wrapped_id_vars() 4884 return self.expression( 4885 exp.PeriodForSystemTimeConstraint, 4886 this=seq_get(id_vars, 0), 4887 expression=seq_get(id_vars, 1), 4888 ) 4889 4890 def _parse_primary_key( 4891 self, wrapped_optional: bool = False, in_props: bool = False 4892 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4893 desc = ( 4894 self._match_set((TokenType.ASC, TokenType.DESC)) 4895 and self._prev.token_type == TokenType.DESC 4896 ) 4897 4898 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4899 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4900 4901 expressions = self._parse_wrapped_csv( 4902 self._parse_primary_key_part, optional=wrapped_optional 4903 ) 4904 options = self._parse_key_constraint_options() 4905 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4906 4907 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4908 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4909 4910 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4911 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4912 return this 4913 4914 bracket_kind = self._prev.token_type 4915 expressions = self._parse_csv( 4916 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4917 ) 4918 4919 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4920 self.raise_error("Expected ]") 4921 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4922 self.raise_error("Expected }") 4923 4924 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4925 if bracket_kind == TokenType.L_BRACE: 4926 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4927 elif not this or this.name.upper() == "ARRAY": 4928 this = self.expression(exp.Array, expressions=expressions) 4929 else: 4930 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4931 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4932 4933 self._add_comments(this) 4934 return self._parse_bracket(this) 4935 4936 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4937 if self._match(TokenType.COLON): 4938 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4939 return this 4940 4941 def _parse_case(self) -> t.Optional[exp.Expression]: 4942 ifs = [] 4943 default = None 4944 4945 comments = self._prev_comments 4946 expression = self._parse_conjunction() 4947 4948 while self._match(TokenType.WHEN): 4949 this = self._parse_conjunction() 4950 self._match(TokenType.THEN) 4951 then = self._parse_conjunction() 4952 ifs.append(self.expression(exp.If, this=this, true=then)) 4953 4954 if self._match(TokenType.ELSE): 4955 default = self._parse_conjunction() 4956 4957 if not self._match(TokenType.END): 4958 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4959 default = exp.column("interval") 4960 else: 4961 self.raise_error("Expected END after CASE", self._prev) 4962 4963 return self.expression( 4964 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4965 ) 4966 4967 def _parse_if(self) -> t.Optional[exp.Expression]: 4968 if self._match(TokenType.L_PAREN): 4969 args = self._parse_csv(self._parse_conjunction) 4970 this = self.validate_expression(exp.If.from_arg_list(args), args) 4971 self._match_r_paren() 4972 else: 4973 index = self._index - 1 4974 4975 if self.NO_PAREN_IF_COMMANDS and index == 0: 4976 return self._parse_as_command(self._prev) 4977 4978 condition = self._parse_conjunction() 4979 4980 if not condition: 4981 self._retreat(index) 4982 return None 4983 4984 self._match(TokenType.THEN) 4985 true = self._parse_conjunction() 4986 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4987 self._match(TokenType.END) 4988 this = self.expression(exp.If, this=condition, true=true, false=false) 4989 4990 return this 4991 4992 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4993 if not self._match_text_seq("VALUE", "FOR"): 4994 self._retreat(self._index - 1) 4995 return None 4996 4997 return self.expression( 4998 exp.NextValueFor, 4999 this=self._parse_column(), 5000 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5001 ) 5002 5003 def _parse_extract(self) -> exp.Extract: 5004 this = self._parse_function() or self._parse_var() or self._parse_type() 5005 5006 if self._match(TokenType.FROM): 5007 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5008 5009 if not self._match(TokenType.COMMA): 5010 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5011 5012 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5013 5014 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5015 this = self._parse_conjunction() 5016 5017 if not self._match(TokenType.ALIAS): 5018 if self._match(TokenType.COMMA): 5019 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5020 5021 self.raise_error("Expected AS after CAST") 5022 5023 fmt = None 5024 to = self._parse_types() 5025 5026 if self._match(TokenType.FORMAT): 5027 fmt_string = self._parse_string() 5028 fmt = self._parse_at_time_zone(fmt_string) 5029 5030 if not to: 5031 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5032 if to.this in exp.DataType.TEMPORAL_TYPES: 5033 this = self.expression( 5034 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5035 this=this, 5036 format=exp.Literal.string( 5037 format_time( 5038 fmt_string.this if fmt_string else "", 5039 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5040 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5041 ) 5042 ), 5043 ) 5044 5045 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5046 this.set("zone", fmt.args["zone"]) 5047 return this 5048 elif not to: 5049 self.raise_error("Expected TYPE after CAST") 5050 elif isinstance(to, exp.Identifier): 5051 to = exp.DataType.build(to.name, udt=True) 5052 elif to.this == exp.DataType.Type.CHAR: 5053 if self._match(TokenType.CHARACTER_SET): 5054 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5055 5056 return self.expression( 5057 exp.Cast if strict else exp.TryCast, 5058 this=this, 5059 to=to, 5060 format=fmt, 5061 safe=safe, 5062 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5063 ) 5064 5065 def _parse_string_agg(self) -> exp.Expression: 5066 if self._match(TokenType.DISTINCT): 5067 args: t.List[t.Optional[exp.Expression]] = [ 5068 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5069 ] 5070 if self._match(TokenType.COMMA): 5071 args.extend(self._parse_csv(self._parse_conjunction)) 5072 else: 5073 args = self._parse_csv(self._parse_conjunction) # type: ignore 5074 5075 index = self._index 5076 if not self._match(TokenType.R_PAREN) and args: 5077 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5078 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5079 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5080 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5081 5082 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5083 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5084 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5085 if not self._match_text_seq("WITHIN", "GROUP"): 5086 self._retreat(index) 5087 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5088 5089 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5090 order = self._parse_order(this=seq_get(args, 0)) 5091 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5092 5093 def _parse_convert( 5094 self, strict: bool, safe: t.Optional[bool] = None 5095 ) -> t.Optional[exp.Expression]: 5096 this = self._parse_bitwise() 5097 5098 if self._match(TokenType.USING): 5099 to: t.Optional[exp.Expression] = self.expression( 5100 exp.CharacterSet, this=self._parse_var() 5101 ) 5102 elif self._match(TokenType.COMMA): 5103 to = self._parse_types() 5104 else: 5105 to = None 5106 5107 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5108 5109 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5110 """ 5111 There are generally two variants of the DECODE function: 5112 5113 - DECODE(bin, charset) 5114 - DECODE(expression, search, result [, search, result] ... [, default]) 5115 5116 The second variant will always be parsed into a CASE expression. Note that NULL 5117 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5118 instead of relying on pattern matching. 5119 """ 5120 args = self._parse_csv(self._parse_conjunction) 5121 5122 if len(args) < 3: 5123 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5124 5125 expression, *expressions = args 5126 if not expression: 5127 return None 5128 5129 ifs = [] 5130 for search, result in zip(expressions[::2], expressions[1::2]): 5131 if not search or not result: 5132 return None 5133 5134 if isinstance(search, exp.Literal): 5135 ifs.append( 5136 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5137 ) 5138 elif isinstance(search, exp.Null): 5139 ifs.append( 5140 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5141 ) 5142 else: 5143 cond = exp.or_( 5144 exp.EQ(this=expression.copy(), expression=search), 5145 exp.and_( 5146 exp.Is(this=expression.copy(), expression=exp.Null()), 5147 exp.Is(this=search.copy(), expression=exp.Null()), 5148 copy=False, 5149 ), 5150 copy=False, 5151 ) 5152 ifs.append(exp.If(this=cond, true=result)) 5153 5154 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5155 5156 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5157 self._match_text_seq("KEY") 5158 key = self._parse_column() 5159 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5160 self._match_text_seq("VALUE") 5161 value = self._parse_bitwise() 5162 5163 if not key and not value: 5164 return None 5165 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5166 5167 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5168 if not this or not self._match_text_seq("FORMAT", "JSON"): 5169 return this 5170 5171 return self.expression(exp.FormatJson, this=this) 5172 5173 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5174 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5175 for value in values: 5176 if self._match_text_seq(value, "ON", on): 5177 return f"{value} ON {on}" 5178 5179 return None 5180 5181 @t.overload 5182 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5183 5184 @t.overload 5185 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5186 5187 def _parse_json_object(self, agg=False): 5188 star = self._parse_star() 5189 expressions = ( 5190 [star] 5191 if star 5192 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5193 ) 5194 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5195 5196 unique_keys = None 5197 if self._match_text_seq("WITH", "UNIQUE"): 5198 unique_keys = True 5199 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5200 unique_keys = False 5201 5202 self._match_text_seq("KEYS") 5203 5204 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5205 self._parse_type() 5206 ) 5207 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5208 5209 return self.expression( 5210 exp.JSONObjectAgg if agg else exp.JSONObject, 5211 expressions=expressions, 5212 null_handling=null_handling, 5213 unique_keys=unique_keys, 5214 return_type=return_type, 5215 encoding=encoding, 5216 ) 5217 5218 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5219 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5220 if not self._match_text_seq("NESTED"): 5221 this = self._parse_id_var() 5222 kind = self._parse_types(allow_identifiers=False) 5223 nested = None 5224 else: 5225 this = None 5226 kind = None 5227 nested = True 5228 5229 path = self._match_text_seq("PATH") and self._parse_string() 5230 nested_schema = nested and self._parse_json_schema() 5231 5232 return self.expression( 5233 exp.JSONColumnDef, 5234 this=this, 5235 kind=kind, 5236 path=path, 5237 nested_schema=nested_schema, 5238 ) 5239 5240 def _parse_json_schema(self) -> exp.JSONSchema: 5241 self._match_text_seq("COLUMNS") 5242 return self.expression( 5243 exp.JSONSchema, 5244 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5245 ) 5246 5247 def _parse_json_table(self) -> exp.JSONTable: 5248 this = self._parse_format_json(self._parse_bitwise()) 5249 path = self._match(TokenType.COMMA) and self._parse_string() 5250 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5251 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5252 schema = self._parse_json_schema() 5253 5254 return exp.JSONTable( 5255 this=this, 5256 schema=schema, 5257 path=path, 5258 error_handling=error_handling, 5259 empty_handling=empty_handling, 5260 ) 5261 5262 def _parse_match_against(self) -> exp.MatchAgainst: 5263 expressions = self._parse_csv(self._parse_column) 5264 5265 self._match_text_seq(")", "AGAINST", "(") 5266 5267 this = self._parse_string() 5268 5269 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5270 modifier = "IN NATURAL LANGUAGE MODE" 5271 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5272 modifier = f"{modifier} WITH QUERY EXPANSION" 5273 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5274 modifier = "IN BOOLEAN MODE" 5275 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5276 modifier = "WITH QUERY EXPANSION" 5277 else: 5278 modifier = None 5279 5280 return self.expression( 5281 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5282 ) 5283 5284 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5285 def _parse_open_json(self) -> exp.OpenJSON: 5286 this = self._parse_bitwise() 5287 path = self._match(TokenType.COMMA) and self._parse_string() 5288 5289 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5290 this = self._parse_field(any_token=True) 5291 kind = self._parse_types() 5292 path = self._parse_string() 5293 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5294 5295 return self.expression( 5296 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5297 ) 5298 5299 expressions = None 5300 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5301 self._match_l_paren() 5302 expressions = self._parse_csv(_parse_open_json_column_def) 5303 5304 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5305 5306 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5307 args = self._parse_csv(self._parse_bitwise) 5308 5309 if self._match(TokenType.IN): 5310 return self.expression( 5311 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5312 ) 5313 5314 if haystack_first: 5315 haystack = seq_get(args, 0) 5316 needle = seq_get(args, 1) 5317 else: 5318 needle = seq_get(args, 0) 5319 haystack = seq_get(args, 1) 5320 5321 return self.expression( 5322 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5323 ) 5324 5325 def _parse_predict(self) -> exp.Predict: 5326 self._match_text_seq("MODEL") 5327 this = self._parse_table() 5328 5329 self._match(TokenType.COMMA) 5330 self._match_text_seq("TABLE") 5331 5332 return self.expression( 5333 exp.Predict, 5334 this=this, 5335 expression=self._parse_table(), 5336 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5337 ) 5338 5339 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5340 args = self._parse_csv(self._parse_table) 5341 return exp.JoinHint(this=func_name.upper(), expressions=args) 5342 5343 def _parse_substring(self) -> exp.Substring: 5344 # Postgres supports the form: substring(string [from int] [for int]) 5345 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5346 5347 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5348 5349 if self._match(TokenType.FROM): 5350 args.append(self._parse_bitwise()) 5351 if self._match(TokenType.FOR): 5352 if len(args) == 1: 5353 args.append(exp.Literal.number(1)) 5354 args.append(self._parse_bitwise()) 5355 5356 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5357 5358 def _parse_trim(self) -> exp.Trim: 5359 # https://www.w3resource.com/sql/character-functions/trim.php 5360 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5361 5362 position = None 5363 collation = None 5364 expression = None 5365 5366 if self._match_texts(self.TRIM_TYPES): 5367 position = self._prev.text.upper() 5368 5369 this = self._parse_bitwise() 5370 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5371 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5372 expression = self._parse_bitwise() 5373 5374 if invert_order: 5375 this, expression = expression, this 5376 5377 if self._match(TokenType.COLLATE): 5378 collation = self._parse_bitwise() 5379 5380 return self.expression( 5381 exp.Trim, this=this, position=position, expression=expression, collation=collation 5382 ) 5383 5384 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5385 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5386 5387 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5388 return self._parse_window(self._parse_id_var(), alias=True) 5389 5390 def _parse_respect_or_ignore_nulls( 5391 self, this: t.Optional[exp.Expression] 5392 ) -> t.Optional[exp.Expression]: 5393 if self._match_text_seq("IGNORE", "NULLS"): 5394 return self.expression(exp.IgnoreNulls, this=this) 5395 if self._match_text_seq("RESPECT", "NULLS"): 5396 return self.expression(exp.RespectNulls, this=this) 5397 return this 5398 5399 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5400 if self._match(TokenType.HAVING): 5401 self._match_texts(("MAX", "MIN")) 5402 max = self._prev.text.upper() != "MIN" 5403 return self.expression( 5404 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5405 ) 5406 5407 return this 5408 5409 def _parse_window( 5410 self, this: t.Optional[exp.Expression], alias: bool = False 5411 ) -> t.Optional[exp.Expression]: 5412 func = this 5413 comments = func.comments if isinstance(func, exp.Expression) else None 5414 5415 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5416 self._match(TokenType.WHERE) 5417 this = self.expression( 5418 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5419 ) 5420 self._match_r_paren() 5421 5422 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5423 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5424 if self._match_text_seq("WITHIN", "GROUP"): 5425 order = self._parse_wrapped(self._parse_order) 5426 this = self.expression(exp.WithinGroup, this=this, expression=order) 5427 5428 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5429 # Some dialects choose to implement and some do not. 5430 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5431 5432 # There is some code above in _parse_lambda that handles 5433 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5434 5435 # The below changes handle 5436 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5437 5438 # Oracle allows both formats 5439 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5440 # and Snowflake chose to do the same for familiarity 5441 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5442 if isinstance(this, exp.AggFunc): 5443 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5444 5445 if ignore_respect and ignore_respect is not this: 5446 ignore_respect.replace(ignore_respect.this) 5447 this = self.expression(ignore_respect.__class__, this=this) 5448 5449 this = self._parse_respect_or_ignore_nulls(this) 5450 5451 # bigquery select from window x AS (partition by ...) 5452 if alias: 5453 over = None 5454 self._match(TokenType.ALIAS) 5455 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5456 return this 5457 else: 5458 over = self._prev.text.upper() 5459 5460 if comments and isinstance(func, exp.Expression): 5461 func.pop_comments() 5462 5463 if not self._match(TokenType.L_PAREN): 5464 return self.expression( 5465 exp.Window, 5466 comments=comments, 5467 this=this, 5468 alias=self._parse_id_var(False), 5469 over=over, 5470 ) 5471 5472 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5473 5474 first = self._match(TokenType.FIRST) 5475 if self._match_text_seq("LAST"): 5476 first = False 5477 5478 partition, order = self._parse_partition_and_order() 5479 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5480 5481 if kind: 5482 self._match(TokenType.BETWEEN) 5483 start = self._parse_window_spec() 5484 self._match(TokenType.AND) 5485 end = self._parse_window_spec() 5486 5487 spec = self.expression( 5488 exp.WindowSpec, 5489 kind=kind, 5490 start=start["value"], 5491 start_side=start["side"], 5492 end=end["value"], 5493 end_side=end["side"], 5494 ) 5495 else: 5496 spec = None 5497 5498 self._match_r_paren() 5499 5500 window = self.expression( 5501 exp.Window, 5502 comments=comments, 5503 this=this, 5504 partition_by=partition, 5505 order=order, 5506 spec=spec, 5507 alias=window_alias, 5508 over=over, 5509 first=first, 5510 ) 5511 5512 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5513 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5514 return self._parse_window(window, alias=alias) 5515 5516 return window 5517 5518 def _parse_partition_and_order( 5519 self, 5520 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5521 return self._parse_partition_by(), self._parse_order() 5522 5523 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5524 self._match(TokenType.BETWEEN) 5525 5526 return { 5527 "value": ( 5528 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5529 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5530 or self._parse_bitwise() 5531 ), 5532 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5533 } 5534 5535 def _parse_alias( 5536 self, this: t.Optional[exp.Expression], explicit: bool = False 5537 ) -> t.Optional[exp.Expression]: 5538 any_token = self._match(TokenType.ALIAS) 5539 comments = self._prev_comments or [] 5540 5541 if explicit and not any_token: 5542 return this 5543 5544 if self._match(TokenType.L_PAREN): 5545 aliases = self.expression( 5546 exp.Aliases, 5547 comments=comments, 5548 this=this, 5549 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5550 ) 5551 self._match_r_paren(aliases) 5552 return aliases 5553 5554 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5555 self.STRING_ALIASES and self._parse_string_as_identifier() 5556 ) 5557 5558 if alias: 5559 comments.extend(alias.pop_comments()) 5560 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5561 column = this.this 5562 5563 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5564 if not this.comments and column and column.comments: 5565 this.comments = column.pop_comments() 5566 5567 return this 5568 5569 def _parse_id_var( 5570 self, 5571 any_token: bool = True, 5572 tokens: t.Optional[t.Collection[TokenType]] = None, 5573 ) -> t.Optional[exp.Expression]: 5574 expression = self._parse_identifier() 5575 if not expression and ( 5576 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5577 ): 5578 quoted = self._prev.token_type == TokenType.STRING 5579 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5580 5581 return expression 5582 5583 def _parse_string(self) -> t.Optional[exp.Expression]: 5584 if self._match_set(self.STRING_PARSERS): 5585 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5586 return self._parse_placeholder() 5587 5588 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5589 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5590 5591 def _parse_number(self) -> t.Optional[exp.Expression]: 5592 if self._match_set(self.NUMERIC_PARSERS): 5593 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5594 return self._parse_placeholder() 5595 5596 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5597 if self._match(TokenType.IDENTIFIER): 5598 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5599 return self._parse_placeholder() 5600 5601 def _parse_var( 5602 self, 5603 any_token: bool = False, 5604 tokens: t.Optional[t.Collection[TokenType]] = None, 5605 upper: bool = False, 5606 ) -> t.Optional[exp.Expression]: 5607 if ( 5608 (any_token and self._advance_any()) 5609 or self._match(TokenType.VAR) 5610 or (self._match_set(tokens) if tokens else False) 5611 ): 5612 return self.expression( 5613 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5614 ) 5615 return self._parse_placeholder() 5616 5617 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5618 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5619 self._advance() 5620 return self._prev 5621 return None 5622 5623 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5624 return self._parse_var() or self._parse_string() 5625 5626 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5627 return self._parse_primary() or self._parse_var(any_token=True) 5628 5629 def _parse_null(self) -> t.Optional[exp.Expression]: 5630 if self._match_set(self.NULL_TOKENS): 5631 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5632 return self._parse_placeholder() 5633 5634 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5635 if self._match(TokenType.TRUE): 5636 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5637 if self._match(TokenType.FALSE): 5638 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5639 return self._parse_placeholder() 5640 5641 def _parse_star(self) -> t.Optional[exp.Expression]: 5642 if self._match(TokenType.STAR): 5643 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5644 return self._parse_placeholder() 5645 5646 def _parse_parameter(self) -> exp.Parameter: 5647 self._match(TokenType.L_BRACE) 5648 this = self._parse_identifier() or self._parse_primary_or_var() 5649 expression = self._match(TokenType.COLON) and ( 5650 self._parse_identifier() or self._parse_primary_or_var() 5651 ) 5652 self._match(TokenType.R_BRACE) 5653 return self.expression(exp.Parameter, this=this, expression=expression) 5654 5655 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5656 if self._match_set(self.PLACEHOLDER_PARSERS): 5657 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5658 if placeholder: 5659 return placeholder 5660 self._advance(-1) 5661 return None 5662 5663 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5664 if not self._match(TokenType.EXCEPT): 5665 return None 5666 if self._match(TokenType.L_PAREN, advance=False): 5667 return self._parse_wrapped_csv(self._parse_column) 5668 5669 except_column = self._parse_column() 5670 return [except_column] if except_column else None 5671 5672 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5673 if not self._match(TokenType.REPLACE): 5674 return None 5675 if self._match(TokenType.L_PAREN, advance=False): 5676 return self._parse_wrapped_csv(self._parse_expression) 5677 5678 replace_expression = self._parse_expression() 5679 return [replace_expression] if replace_expression else None 5680 5681 def _parse_csv( 5682 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5683 ) -> t.List[exp.Expression]: 5684 parse_result = parse_method() 5685 items = [parse_result] if parse_result is not None else [] 5686 5687 while self._match(sep): 5688 self._add_comments(parse_result) 5689 parse_result = parse_method() 5690 if parse_result is not None: 5691 items.append(parse_result) 5692 5693 return items 5694 5695 def _parse_tokens( 5696 self, parse_method: t.Callable, expressions: t.Dict 5697 ) -> t.Optional[exp.Expression]: 5698 this = parse_method() 5699 5700 while self._match_set(expressions): 5701 this = self.expression( 5702 expressions[self._prev.token_type], 5703 this=this, 5704 comments=self._prev_comments, 5705 expression=parse_method(), 5706 ) 5707 5708 return this 5709 5710 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5711 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5712 5713 def _parse_wrapped_csv( 5714 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5715 ) -> t.List[exp.Expression]: 5716 return self._parse_wrapped( 5717 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5718 ) 5719 5720 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5721 wrapped = self._match(TokenType.L_PAREN) 5722 if not wrapped and not optional: 5723 self.raise_error("Expecting (") 5724 parse_result = parse_method() 5725 if wrapped: 5726 self._match_r_paren() 5727 return parse_result 5728 5729 def _parse_expressions(self) -> t.List[exp.Expression]: 5730 return self._parse_csv(self._parse_expression) 5731 5732 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5733 return self._parse_select() or self._parse_set_operations( 5734 self._parse_expression() if alias else self._parse_conjunction() 5735 ) 5736 5737 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5738 return self._parse_query_modifiers( 5739 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5740 ) 5741 5742 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5743 this = None 5744 if self._match_texts(self.TRANSACTION_KIND): 5745 this = self._prev.text 5746 5747 self._match_texts(("TRANSACTION", "WORK")) 5748 5749 modes = [] 5750 while True: 5751 mode = [] 5752 while self._match(TokenType.VAR): 5753 mode.append(self._prev.text) 5754 5755 if mode: 5756 modes.append(" ".join(mode)) 5757 if not self._match(TokenType.COMMA): 5758 break 5759 5760 return self.expression(exp.Transaction, this=this, modes=modes) 5761 5762 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5763 chain = None 5764 savepoint = None 5765 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5766 5767 self._match_texts(("TRANSACTION", "WORK")) 5768 5769 if self._match_text_seq("TO"): 5770 self._match_text_seq("SAVEPOINT") 5771 savepoint = self._parse_id_var() 5772 5773 if self._match(TokenType.AND): 5774 chain = not self._match_text_seq("NO") 5775 self._match_text_seq("CHAIN") 5776 5777 if is_rollback: 5778 return self.expression(exp.Rollback, savepoint=savepoint) 5779 5780 return self.expression(exp.Commit, chain=chain) 5781 5782 def _parse_refresh(self) -> exp.Refresh: 5783 self._match(TokenType.TABLE) 5784 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5785 5786 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5787 if not self._match_text_seq("ADD"): 5788 return None 5789 5790 self._match(TokenType.COLUMN) 5791 exists_column = self._parse_exists(not_=True) 5792 expression = self._parse_field_def() 5793 5794 if expression: 5795 expression.set("exists", exists_column) 5796 5797 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5798 if self._match_texts(("FIRST", "AFTER")): 5799 position = self._prev.text 5800 column_position = self.expression( 5801 exp.ColumnPosition, this=self._parse_column(), position=position 5802 ) 5803 expression.set("position", column_position) 5804 5805 return expression 5806 5807 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5808 drop = self._match(TokenType.DROP) and self._parse_drop() 5809 if drop and not isinstance(drop, exp.Command): 5810 drop.set("kind", drop.args.get("kind", "COLUMN")) 5811 return drop 5812 5813 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5814 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5815 return self.expression( 5816 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5817 ) 5818 5819 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5820 index = self._index - 1 5821 5822 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5823 return self._parse_csv( 5824 lambda: self.expression( 5825 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5826 ) 5827 ) 5828 5829 self._retreat(index) 5830 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5831 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5832 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5833 5834 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 5835 if self._match_texts(self.ALTER_ALTER_PARSERS): 5836 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 5837 5838 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 5839 # keyword after ALTER we default to parsing this statement 5840 self._match(TokenType.COLUMN) 5841 column = self._parse_field(any_token=True) 5842 5843 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5844 return self.expression(exp.AlterColumn, this=column, drop=True) 5845 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5846 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5847 if self._match(TokenType.COMMENT): 5848 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5849 5850 self._match_text_seq("SET", "DATA") 5851 self._match_text_seq("TYPE") 5852 return self.expression( 5853 exp.AlterColumn, 5854 this=column, 5855 dtype=self._parse_types(), 5856 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5857 using=self._match(TokenType.USING) and self._parse_conjunction(), 5858 ) 5859 5860 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 5861 if self._match_texts(("ALL", "EVEN", "AUTO")): 5862 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 5863 5864 self._match_text_seq("KEY", "DISTKEY") 5865 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 5866 5867 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 5868 if compound: 5869 self._match_text_seq("SORTKEY") 5870 5871 if self._match(TokenType.L_PAREN, advance=False): 5872 return self.expression( 5873 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 5874 ) 5875 5876 self._match_texts(("AUTO", "NONE")) 5877 return self.expression( 5878 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 5879 ) 5880 5881 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5882 index = self._index - 1 5883 5884 partition_exists = self._parse_exists() 5885 if self._match(TokenType.PARTITION, advance=False): 5886 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5887 5888 self._retreat(index) 5889 return self._parse_csv(self._parse_drop_column) 5890 5891 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5892 if self._match(TokenType.COLUMN): 5893 exists = self._parse_exists() 5894 old_column = self._parse_column() 5895 to = self._match_text_seq("TO") 5896 new_column = self._parse_column() 5897 5898 if old_column is None or to is None or new_column is None: 5899 return None 5900 5901 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5902 5903 self._match_text_seq("TO") 5904 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5905 5906 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5907 start = self._prev 5908 5909 if not self._match(TokenType.TABLE): 5910 return self._parse_as_command(start) 5911 5912 exists = self._parse_exists() 5913 only = self._match_text_seq("ONLY") 5914 this = self._parse_table(schema=True) 5915 5916 if self._next: 5917 self._advance() 5918 5919 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5920 if parser: 5921 actions = ensure_list(parser(self)) 5922 options = self._parse_csv(self._parse_property) 5923 5924 if not self._curr and actions: 5925 return self.expression( 5926 exp.AlterTable, 5927 this=this, 5928 exists=exists, 5929 actions=actions, 5930 only=only, 5931 options=options, 5932 ) 5933 5934 return self._parse_as_command(start) 5935 5936 def _parse_merge(self) -> exp.Merge: 5937 self._match(TokenType.INTO) 5938 target = self._parse_table() 5939 5940 if target and self._match(TokenType.ALIAS, advance=False): 5941 target.set("alias", self._parse_table_alias()) 5942 5943 self._match(TokenType.USING) 5944 using = self._parse_table() 5945 5946 self._match(TokenType.ON) 5947 on = self._parse_conjunction() 5948 5949 return self.expression( 5950 exp.Merge, 5951 this=target, 5952 using=using, 5953 on=on, 5954 expressions=self._parse_when_matched(), 5955 ) 5956 5957 def _parse_when_matched(self) -> t.List[exp.When]: 5958 whens = [] 5959 5960 while self._match(TokenType.WHEN): 5961 matched = not self._match(TokenType.NOT) 5962 self._match_text_seq("MATCHED") 5963 source = ( 5964 False 5965 if self._match_text_seq("BY", "TARGET") 5966 else self._match_text_seq("BY", "SOURCE") 5967 ) 5968 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5969 5970 self._match(TokenType.THEN) 5971 5972 if self._match(TokenType.INSERT): 5973 _this = self._parse_star() 5974 if _this: 5975 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5976 else: 5977 then = self.expression( 5978 exp.Insert, 5979 this=self._parse_value(), 5980 expression=self._match_text_seq("VALUES") and self._parse_value(), 5981 ) 5982 elif self._match(TokenType.UPDATE): 5983 expressions = self._parse_star() 5984 if expressions: 5985 then = self.expression(exp.Update, expressions=expressions) 5986 else: 5987 then = self.expression( 5988 exp.Update, 5989 expressions=self._match(TokenType.SET) 5990 and self._parse_csv(self._parse_equality), 5991 ) 5992 elif self._match(TokenType.DELETE): 5993 then = self.expression(exp.Var, this=self._prev.text) 5994 else: 5995 then = None 5996 5997 whens.append( 5998 self.expression( 5999 exp.When, 6000 matched=matched, 6001 source=source, 6002 condition=condition, 6003 then=then, 6004 ) 6005 ) 6006 return whens 6007 6008 def _parse_show(self) -> t.Optional[exp.Expression]: 6009 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6010 if parser: 6011 return parser(self) 6012 return self._parse_as_command(self._prev) 6013 6014 def _parse_set_item_assignment( 6015 self, kind: t.Optional[str] = None 6016 ) -> t.Optional[exp.Expression]: 6017 index = self._index 6018 6019 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6020 return self._parse_set_transaction(global_=kind == "GLOBAL") 6021 6022 left = self._parse_primary() or self._parse_column() 6023 assignment_delimiter = self._match_texts(("=", "TO")) 6024 6025 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6026 self._retreat(index) 6027 return None 6028 6029 right = self._parse_statement() or self._parse_id_var() 6030 this = self.expression(exp.EQ, this=left, expression=right) 6031 6032 return self.expression(exp.SetItem, this=this, kind=kind) 6033 6034 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6035 self._match_text_seq("TRANSACTION") 6036 characteristics = self._parse_csv( 6037 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6038 ) 6039 return self.expression( 6040 exp.SetItem, 6041 expressions=characteristics, 6042 kind="TRANSACTION", 6043 **{"global": global_}, # type: ignore 6044 ) 6045 6046 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6047 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6048 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6049 6050 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6051 index = self._index 6052 set_ = self.expression( 6053 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6054 ) 6055 6056 if self._curr: 6057 self._retreat(index) 6058 return self._parse_as_command(self._prev) 6059 6060 return set_ 6061 6062 def _parse_var_from_options( 6063 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6064 ) -> t.Optional[exp.Var]: 6065 start = self._curr 6066 if not start: 6067 return None 6068 6069 option = start.text.upper() 6070 continuations = options.get(option) 6071 6072 index = self._index 6073 self._advance() 6074 for keywords in continuations or []: 6075 if isinstance(keywords, str): 6076 keywords = (keywords,) 6077 6078 if self._match_text_seq(*keywords): 6079 option = f"{option} {' '.join(keywords)}" 6080 break 6081 else: 6082 if continuations or continuations is None: 6083 if raise_unmatched: 6084 self.raise_error(f"Unknown option {option}") 6085 6086 self._retreat(index) 6087 return None 6088 6089 return exp.var(option) 6090 6091 def _parse_as_command(self, start: Token) -> exp.Command: 6092 while self._curr: 6093 self._advance() 6094 text = self._find_sql(start, self._prev) 6095 size = len(start.text) 6096 self._warn_unsupported() 6097 return exp.Command(this=text[:size], expression=text[size:]) 6098 6099 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6100 settings = [] 6101 6102 self._match_l_paren() 6103 kind = self._parse_id_var() 6104 6105 if self._match(TokenType.L_PAREN): 6106 while True: 6107 key = self._parse_id_var() 6108 value = self._parse_primary() 6109 6110 if not key and value is None: 6111 break 6112 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6113 self._match(TokenType.R_PAREN) 6114 6115 self._match_r_paren() 6116 6117 return self.expression( 6118 exp.DictProperty, 6119 this=this, 6120 kind=kind.this if kind else None, 6121 settings=settings, 6122 ) 6123 6124 def _parse_dict_range(self, this: str) -> exp.DictRange: 6125 self._match_l_paren() 6126 has_min = self._match_text_seq("MIN") 6127 if has_min: 6128 min = self._parse_var() or self._parse_primary() 6129 self._match_text_seq("MAX") 6130 max = self._parse_var() or self._parse_primary() 6131 else: 6132 max = self._parse_var() or self._parse_primary() 6133 min = exp.Literal.number(0) 6134 self._match_r_paren() 6135 return self.expression(exp.DictRange, this=this, min=min, max=max) 6136 6137 def _parse_comprehension( 6138 self, this: t.Optional[exp.Expression] 6139 ) -> t.Optional[exp.Comprehension]: 6140 index = self._index 6141 expression = self._parse_column() 6142 if not self._match(TokenType.IN): 6143 self._retreat(index - 1) 6144 return None 6145 iterator = self._parse_column() 6146 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6147 return self.expression( 6148 exp.Comprehension, 6149 this=this, 6150 expression=expression, 6151 iterator=iterator, 6152 condition=condition, 6153 ) 6154 6155 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6156 if self._match(TokenType.HEREDOC_STRING): 6157 return self.expression(exp.Heredoc, this=self._prev.text) 6158 6159 if not self._match_text_seq("$"): 6160 return None 6161 6162 tags = ["$"] 6163 tag_text = None 6164 6165 if self._is_connected(): 6166 self._advance() 6167 tags.append(self._prev.text.upper()) 6168 else: 6169 self.raise_error("No closing $ found") 6170 6171 if tags[-1] != "$": 6172 if self._is_connected() and self._match_text_seq("$"): 6173 tag_text = tags[-1] 6174 tags.append("$") 6175 else: 6176 self.raise_error("No closing $ found") 6177 6178 heredoc_start = self._curr 6179 6180 while self._curr: 6181 if self._match_text_seq(*tags, advance=False): 6182 this = self._find_sql(heredoc_start, self._prev) 6183 self._advance(len(tags)) 6184 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6185 6186 self._advance() 6187 6188 self.raise_error(f"No closing {''.join(tags)} found") 6189 return None 6190 6191 def _find_parser( 6192 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6193 ) -> t.Optional[t.Callable]: 6194 if not self._curr: 6195 return None 6196 6197 index = self._index 6198 this = [] 6199 while True: 6200 # The current token might be multiple words 6201 curr = self._curr.text.upper() 6202 key = curr.split(" ") 6203 this.append(curr) 6204 6205 self._advance() 6206 result, trie = in_trie(trie, key) 6207 if result == TrieResult.FAILED: 6208 break 6209 6210 if result == TrieResult.EXISTS: 6211 subparser = parsers[" ".join(this)] 6212 return subparser 6213 6214 self._retreat(index) 6215 return None 6216 6217 def _match(self, token_type, advance=True, expression=None): 6218 if not self._curr: 6219 return None 6220 6221 if self._curr.token_type == token_type: 6222 if advance: 6223 self._advance() 6224 self._add_comments(expression) 6225 return True 6226 6227 return None 6228 6229 def _match_set(self, types, advance=True): 6230 if not self._curr: 6231 return None 6232 6233 if self._curr.token_type in types: 6234 if advance: 6235 self._advance() 6236 return True 6237 6238 return None 6239 6240 def _match_pair(self, token_type_a, token_type_b, advance=True): 6241 if not self._curr or not self._next: 6242 return None 6243 6244 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6245 if advance: 6246 self._advance(2) 6247 return True 6248 6249 return None 6250 6251 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6252 if not self._match(TokenType.L_PAREN, expression=expression): 6253 self.raise_error("Expecting (") 6254 6255 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6256 if not self._match(TokenType.R_PAREN, expression=expression): 6257 self.raise_error("Expecting )") 6258 6259 def _match_texts(self, texts, advance=True): 6260 if self._curr and self._curr.text.upper() in texts: 6261 if advance: 6262 self._advance() 6263 return True 6264 return None 6265 6266 def _match_text_seq(self, *texts, advance=True): 6267 index = self._index 6268 for text in texts: 6269 if self._curr and self._curr.text.upper() == text: 6270 self._advance() 6271 else: 6272 self._retreat(index) 6273 return None 6274 6275 if not advance: 6276 self._retreat(index) 6277 6278 return True 6279 6280 def _replace_lambda( 6281 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6282 ) -> t.Optional[exp.Expression]: 6283 if not node: 6284 return node 6285 6286 for column in node.find_all(exp.Column): 6287 if column.parts[0].name in lambda_variables: 6288 dot_or_id = column.to_dot() if column.table else column.this 6289 parent = column.parent 6290 6291 while isinstance(parent, exp.Dot): 6292 if not isinstance(parent.parent, exp.Dot): 6293 parent.replace(dot_or_id) 6294 break 6295 parent = parent.parent 6296 else: 6297 if column is node: 6298 node = dot_or_id 6299 else: 6300 column.replace(dot_or_id) 6301 return node 6302 6303 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6304 start = self._prev 6305 6306 # Not to be confused with TRUNCATE(number, decimals) function call 6307 if self._match(TokenType.L_PAREN): 6308 self._retreat(self._index - 2) 6309 return self._parse_function() 6310 6311 # Clickhouse supports TRUNCATE DATABASE as well 6312 is_database = self._match(TokenType.DATABASE) 6313 6314 self._match(TokenType.TABLE) 6315 6316 exists = self._parse_exists(not_=False) 6317 6318 expressions = self._parse_csv( 6319 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6320 ) 6321 6322 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6323 6324 if self._match_text_seq("RESTART", "IDENTITY"): 6325 identity = "RESTART" 6326 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6327 identity = "CONTINUE" 6328 else: 6329 identity = None 6330 6331 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6332 option = self._prev.text 6333 else: 6334 option = None 6335 6336 partition = self._parse_partition() 6337 6338 # Fallback case 6339 if self._curr: 6340 return self._parse_as_command(start) 6341 6342 return self.expression( 6343 exp.TruncateTable, 6344 expressions=expressions, 6345 is_database=is_database, 6346 exists=exists, 6347 cluster=cluster, 6348 identity=identity, 6349 option=option, 6350 partition=partition, 6351 ) 6352 6353 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6354 this = self._parse_ordered(self._parse_opclass) 6355 6356 if not self._match(TokenType.WITH): 6357 return this 6358 6359 op = self._parse_var(any_token=True) 6360 6361 return self.expression(exp.WithOperator, this=this, op=op) 6362 6363 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6364 opts = [] 6365 self._match(TokenType.EQ) 6366 self._match(TokenType.L_PAREN) 6367 while self._curr and not self._match(TokenType.R_PAREN): 6368 opts.append(self._parse_conjunction()) 6369 self._match(TokenType.COMMA) 6370 return opts 6371 6372 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6373 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6374 6375 options = [] 6376 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6377 option = self._parse_unquoted_field() 6378 value = None 6379 6380 # Some options are defined as functions with the values as params 6381 if not isinstance(option, exp.Func): 6382 prev = self._prev.text.upper() 6383 # Different dialects might separate options and values by white space, "=" and "AS" 6384 self._match(TokenType.EQ) 6385 self._match(TokenType.ALIAS) 6386 6387 if prev == "FILE_FORMAT" and self._match(TokenType.L_PAREN): 6388 # Snowflake FILE_FORMAT case 6389 value = self._parse_wrapped_options() 6390 else: 6391 value = self._parse_unquoted_field() 6392 6393 param = self.expression(exp.CopyParameter, this=option, expression=value) 6394 options.append(param) 6395 6396 if sep: 6397 self._match(sep) 6398 6399 return options 6400 6401 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6402 expr = self.expression(exp.Credentials) 6403 6404 if self._match_text_seq("STORAGE_INTEGRATION", advance=False): 6405 expr.set("storage", self._parse_conjunction()) 6406 if self._match_text_seq("CREDENTIALS"): 6407 # Snowflake supports CREDENTIALS = (...), while Redshift CREDENTIALS <string> 6408 creds = ( 6409 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6410 ) 6411 expr.set("credentials", creds) 6412 if self._match_text_seq("ENCRYPTION"): 6413 expr.set("encryption", self._parse_wrapped_options()) 6414 if self._match_text_seq("IAM_ROLE"): 6415 expr.set("iam_role", self._parse_field()) 6416 if self._match_text_seq("REGION"): 6417 expr.set("region", self._parse_field()) 6418 6419 return expr 6420 6421 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6422 return self._parse_field() 6423 6424 def _parse_copy(self) -> exp.Copy | exp.Command: 6425 start = self._prev 6426 6427 self._match(TokenType.INTO) 6428 6429 this = ( 6430 self._parse_conjunction() 6431 if self._match(TokenType.L_PAREN, advance=False) 6432 else self._parse_table(schema=True) 6433 ) 6434 6435 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6436 6437 files = self._parse_csv(self._parse_file_location) 6438 credentials = self._parse_credentials() 6439 6440 self._match_text_seq("WITH") 6441 6442 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6443 6444 # Fallback case 6445 if self._curr: 6446 return self._parse_as_command(start) 6447 6448 return self.expression( 6449 exp.Copy, 6450 this=this, 6451 kind=kind, 6452 credentials=credentials, 6453 files=files, 6454 params=params, 6455 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
52def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 53 # Default argument order is base, expression 54 this = seq_get(args, 0) 55 expression = seq_get(args, 1) 56 57 if expression: 58 if not dialect.LOG_BASE_FIRST: 59 this, expression = expression, this 60 return exp.Log(this=this, expression=expression) 61 62 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
65def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 66 def _builder(args: t.List, dialect: Dialect) -> E: 67 expression = expr_type( 68 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 69 ) 70 if len(args) > 2 and expr_type is exp.JSONExtract: 71 expression.set("expressions", args[2:]) 72 73 return expression 74 75 return _builder
78def build_mod(args: t.List) -> exp.Mod: 79 this = seq_get(args, 0) 80 expression = seq_get(args, 1) 81 82 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 83 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 84 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 85 86 return exp.Mod(this=this, expression=expression)
99class Parser(metaclass=_Parser): 100 """ 101 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 102 103 Args: 104 error_level: The desired error level. 105 Default: ErrorLevel.IMMEDIATE 106 error_message_context: The amount of context to capture from a query string when displaying 107 the error message (in number of characters). 108 Default: 100 109 max_errors: Maximum number of error messages to include in a raised ParseError. 110 This is only relevant if error_level is ErrorLevel.RAISE. 111 Default: 3 112 """ 113 114 FUNCTIONS: t.Dict[str, t.Callable] = { 115 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 116 "CONCAT": lambda args, dialect: exp.Concat( 117 expressions=args, 118 safe=not dialect.STRICT_STRING_CONCAT, 119 coalesce=dialect.CONCAT_COALESCE, 120 ), 121 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 122 expressions=args, 123 safe=not dialect.STRICT_STRING_CONCAT, 124 coalesce=dialect.CONCAT_COALESCE, 125 ), 126 "DATE_TO_DATE_STR": lambda args: exp.Cast( 127 this=seq_get(args, 0), 128 to=exp.DataType(this=exp.DataType.Type.TEXT), 129 ), 130 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 131 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 132 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 133 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 134 "LIKE": build_like, 135 "LOG": build_logarithm, 136 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 137 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 138 "MOD": build_mod, 139 "TIME_TO_TIME_STR": lambda args: exp.Cast( 140 this=seq_get(args, 0), 141 to=exp.DataType(this=exp.DataType.Type.TEXT), 142 ), 143 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 144 this=exp.Cast( 145 this=seq_get(args, 0), 146 to=exp.DataType(this=exp.DataType.Type.TEXT), 147 ), 148 start=exp.Literal.number(1), 149 length=exp.Literal.number(10), 150 ), 151 "VAR_MAP": build_var_map, 152 } 153 154 NO_PAREN_FUNCTIONS = { 155 TokenType.CURRENT_DATE: exp.CurrentDate, 156 TokenType.CURRENT_DATETIME: exp.CurrentDate, 157 TokenType.CURRENT_TIME: exp.CurrentTime, 158 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 159 TokenType.CURRENT_USER: exp.CurrentUser, 160 } 161 162 STRUCT_TYPE_TOKENS = { 163 TokenType.NESTED, 164 TokenType.OBJECT, 165 TokenType.STRUCT, 166 } 167 168 NESTED_TYPE_TOKENS = { 169 TokenType.ARRAY, 170 TokenType.LOWCARDINALITY, 171 TokenType.MAP, 172 TokenType.NULLABLE, 173 *STRUCT_TYPE_TOKENS, 174 } 175 176 ENUM_TYPE_TOKENS = { 177 TokenType.ENUM, 178 TokenType.ENUM8, 179 TokenType.ENUM16, 180 } 181 182 AGGREGATE_TYPE_TOKENS = { 183 TokenType.AGGREGATEFUNCTION, 184 TokenType.SIMPLEAGGREGATEFUNCTION, 185 } 186 187 TYPE_TOKENS = { 188 TokenType.BIT, 189 TokenType.BOOLEAN, 190 TokenType.TINYINT, 191 TokenType.UTINYINT, 192 TokenType.SMALLINT, 193 TokenType.USMALLINT, 194 TokenType.INT, 195 TokenType.UINT, 196 TokenType.BIGINT, 197 TokenType.UBIGINT, 198 TokenType.INT128, 199 TokenType.UINT128, 200 TokenType.INT256, 201 TokenType.UINT256, 202 TokenType.MEDIUMINT, 203 TokenType.UMEDIUMINT, 204 TokenType.FIXEDSTRING, 205 TokenType.FLOAT, 206 TokenType.DOUBLE, 207 TokenType.CHAR, 208 TokenType.NCHAR, 209 TokenType.VARCHAR, 210 TokenType.NVARCHAR, 211 TokenType.BPCHAR, 212 TokenType.TEXT, 213 TokenType.MEDIUMTEXT, 214 TokenType.LONGTEXT, 215 TokenType.MEDIUMBLOB, 216 TokenType.LONGBLOB, 217 TokenType.BINARY, 218 TokenType.VARBINARY, 219 TokenType.JSON, 220 TokenType.JSONB, 221 TokenType.INTERVAL, 222 TokenType.TINYBLOB, 223 TokenType.TINYTEXT, 224 TokenType.TIME, 225 TokenType.TIMETZ, 226 TokenType.TIMESTAMP, 227 TokenType.TIMESTAMP_S, 228 TokenType.TIMESTAMP_MS, 229 TokenType.TIMESTAMP_NS, 230 TokenType.TIMESTAMPTZ, 231 TokenType.TIMESTAMPLTZ, 232 TokenType.TIMESTAMPNTZ, 233 TokenType.DATETIME, 234 TokenType.DATETIME64, 235 TokenType.DATE, 236 TokenType.DATE32, 237 TokenType.INT4RANGE, 238 TokenType.INT4MULTIRANGE, 239 TokenType.INT8RANGE, 240 TokenType.INT8MULTIRANGE, 241 TokenType.NUMRANGE, 242 TokenType.NUMMULTIRANGE, 243 TokenType.TSRANGE, 244 TokenType.TSMULTIRANGE, 245 TokenType.TSTZRANGE, 246 TokenType.TSTZMULTIRANGE, 247 TokenType.DATERANGE, 248 TokenType.DATEMULTIRANGE, 249 TokenType.DECIMAL, 250 TokenType.UDECIMAL, 251 TokenType.BIGDECIMAL, 252 TokenType.UUID, 253 TokenType.GEOGRAPHY, 254 TokenType.GEOMETRY, 255 TokenType.HLLSKETCH, 256 TokenType.HSTORE, 257 TokenType.PSEUDO_TYPE, 258 TokenType.SUPER, 259 TokenType.SERIAL, 260 TokenType.SMALLSERIAL, 261 TokenType.BIGSERIAL, 262 TokenType.XML, 263 TokenType.YEAR, 264 TokenType.UNIQUEIDENTIFIER, 265 TokenType.USERDEFINED, 266 TokenType.MONEY, 267 TokenType.SMALLMONEY, 268 TokenType.ROWVERSION, 269 TokenType.IMAGE, 270 TokenType.VARIANT, 271 TokenType.OBJECT, 272 TokenType.OBJECT_IDENTIFIER, 273 TokenType.INET, 274 TokenType.IPADDRESS, 275 TokenType.IPPREFIX, 276 TokenType.IPV4, 277 TokenType.IPV6, 278 TokenType.UNKNOWN, 279 TokenType.NULL, 280 TokenType.NAME, 281 TokenType.TDIGEST, 282 *ENUM_TYPE_TOKENS, 283 *NESTED_TYPE_TOKENS, 284 *AGGREGATE_TYPE_TOKENS, 285 } 286 287 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 288 TokenType.BIGINT: TokenType.UBIGINT, 289 TokenType.INT: TokenType.UINT, 290 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 291 TokenType.SMALLINT: TokenType.USMALLINT, 292 TokenType.TINYINT: TokenType.UTINYINT, 293 TokenType.DECIMAL: TokenType.UDECIMAL, 294 } 295 296 SUBQUERY_PREDICATES = { 297 TokenType.ANY: exp.Any, 298 TokenType.ALL: exp.All, 299 TokenType.EXISTS: exp.Exists, 300 TokenType.SOME: exp.Any, 301 } 302 303 RESERVED_TOKENS = { 304 *Tokenizer.SINGLE_TOKENS.values(), 305 TokenType.SELECT, 306 } - {TokenType.IDENTIFIER} 307 308 DB_CREATABLES = { 309 TokenType.DATABASE, 310 TokenType.SCHEMA, 311 TokenType.TABLE, 312 TokenType.VIEW, 313 TokenType.MODEL, 314 TokenType.DICTIONARY, 315 TokenType.SEQUENCE, 316 TokenType.STORAGE_INTEGRATION, 317 } 318 319 CREATABLES = { 320 TokenType.COLUMN, 321 TokenType.CONSTRAINT, 322 TokenType.FUNCTION, 323 TokenType.INDEX, 324 TokenType.PROCEDURE, 325 TokenType.FOREIGN_KEY, 326 *DB_CREATABLES, 327 } 328 329 # Tokens that can represent identifiers 330 ID_VAR_TOKENS = { 331 TokenType.VAR, 332 TokenType.ANTI, 333 TokenType.APPLY, 334 TokenType.ASC, 335 TokenType.ASOF, 336 TokenType.AUTO_INCREMENT, 337 TokenType.BEGIN, 338 TokenType.BPCHAR, 339 TokenType.CACHE, 340 TokenType.CASE, 341 TokenType.COLLATE, 342 TokenType.COMMAND, 343 TokenType.COMMENT, 344 TokenType.COMMIT, 345 TokenType.CONSTRAINT, 346 TokenType.COPY, 347 TokenType.DEFAULT, 348 TokenType.DELETE, 349 TokenType.DESC, 350 TokenType.DESCRIBE, 351 TokenType.DICTIONARY, 352 TokenType.DIV, 353 TokenType.END, 354 TokenType.EXECUTE, 355 TokenType.ESCAPE, 356 TokenType.FALSE, 357 TokenType.FIRST, 358 TokenType.FILTER, 359 TokenType.FINAL, 360 TokenType.FORMAT, 361 TokenType.FULL, 362 TokenType.IDENTIFIER, 363 TokenType.IS, 364 TokenType.ISNULL, 365 TokenType.INTERVAL, 366 TokenType.KEEP, 367 TokenType.KILL, 368 TokenType.LEFT, 369 TokenType.LOAD, 370 TokenType.MERGE, 371 TokenType.NATURAL, 372 TokenType.NEXT, 373 TokenType.OFFSET, 374 TokenType.OPERATOR, 375 TokenType.ORDINALITY, 376 TokenType.OVERLAPS, 377 TokenType.OVERWRITE, 378 TokenType.PARTITION, 379 TokenType.PERCENT, 380 TokenType.PIVOT, 381 TokenType.PRAGMA, 382 TokenType.RANGE, 383 TokenType.RECURSIVE, 384 TokenType.REFERENCES, 385 TokenType.REFRESH, 386 TokenType.REPLACE, 387 TokenType.RIGHT, 388 TokenType.ROW, 389 TokenType.ROWS, 390 TokenType.SEMI, 391 TokenType.SET, 392 TokenType.SETTINGS, 393 TokenType.SHOW, 394 TokenType.TEMPORARY, 395 TokenType.TOP, 396 TokenType.TRUE, 397 TokenType.TRUNCATE, 398 TokenType.UNIQUE, 399 TokenType.UNPIVOT, 400 TokenType.UPDATE, 401 TokenType.USE, 402 TokenType.VOLATILE, 403 TokenType.WINDOW, 404 *CREATABLES, 405 *SUBQUERY_PREDICATES, 406 *TYPE_TOKENS, 407 *NO_PAREN_FUNCTIONS, 408 } 409 410 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 411 412 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 413 TokenType.ANTI, 414 TokenType.APPLY, 415 TokenType.ASOF, 416 TokenType.FULL, 417 TokenType.LEFT, 418 TokenType.LOCK, 419 TokenType.NATURAL, 420 TokenType.OFFSET, 421 TokenType.RIGHT, 422 TokenType.SEMI, 423 TokenType.WINDOW, 424 } 425 426 ALIAS_TOKENS = ID_VAR_TOKENS 427 428 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 429 430 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 431 432 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 433 434 FUNC_TOKENS = { 435 TokenType.COLLATE, 436 TokenType.COMMAND, 437 TokenType.CURRENT_DATE, 438 TokenType.CURRENT_DATETIME, 439 TokenType.CURRENT_TIMESTAMP, 440 TokenType.CURRENT_TIME, 441 TokenType.CURRENT_USER, 442 TokenType.FILTER, 443 TokenType.FIRST, 444 TokenType.FORMAT, 445 TokenType.GLOB, 446 TokenType.IDENTIFIER, 447 TokenType.INDEX, 448 TokenType.ISNULL, 449 TokenType.ILIKE, 450 TokenType.INSERT, 451 TokenType.LIKE, 452 TokenType.MERGE, 453 TokenType.OFFSET, 454 TokenType.PRIMARY_KEY, 455 TokenType.RANGE, 456 TokenType.REPLACE, 457 TokenType.RLIKE, 458 TokenType.ROW, 459 TokenType.UNNEST, 460 TokenType.VAR, 461 TokenType.LEFT, 462 TokenType.RIGHT, 463 TokenType.SEQUENCE, 464 TokenType.DATE, 465 TokenType.DATETIME, 466 TokenType.TABLE, 467 TokenType.TIMESTAMP, 468 TokenType.TIMESTAMPTZ, 469 TokenType.TRUNCATE, 470 TokenType.WINDOW, 471 TokenType.XOR, 472 *TYPE_TOKENS, 473 *SUBQUERY_PREDICATES, 474 } 475 476 CONJUNCTION = { 477 TokenType.AND: exp.And, 478 TokenType.OR: exp.Or, 479 } 480 481 EQUALITY = { 482 TokenType.COLON_EQ: exp.PropertyEQ, 483 TokenType.EQ: exp.EQ, 484 TokenType.NEQ: exp.NEQ, 485 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 486 } 487 488 COMPARISON = { 489 TokenType.GT: exp.GT, 490 TokenType.GTE: exp.GTE, 491 TokenType.LT: exp.LT, 492 TokenType.LTE: exp.LTE, 493 } 494 495 BITWISE = { 496 TokenType.AMP: exp.BitwiseAnd, 497 TokenType.CARET: exp.BitwiseXor, 498 TokenType.PIPE: exp.BitwiseOr, 499 } 500 501 TERM = { 502 TokenType.DASH: exp.Sub, 503 TokenType.PLUS: exp.Add, 504 TokenType.MOD: exp.Mod, 505 TokenType.COLLATE: exp.Collate, 506 } 507 508 FACTOR = { 509 TokenType.DIV: exp.IntDiv, 510 TokenType.LR_ARROW: exp.Distance, 511 TokenType.SLASH: exp.Div, 512 TokenType.STAR: exp.Mul, 513 } 514 515 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 516 517 TIMES = { 518 TokenType.TIME, 519 TokenType.TIMETZ, 520 } 521 522 TIMESTAMPS = { 523 TokenType.TIMESTAMP, 524 TokenType.TIMESTAMPTZ, 525 TokenType.TIMESTAMPLTZ, 526 *TIMES, 527 } 528 529 SET_OPERATIONS = { 530 TokenType.UNION, 531 TokenType.INTERSECT, 532 TokenType.EXCEPT, 533 } 534 535 JOIN_METHODS = { 536 TokenType.ASOF, 537 TokenType.NATURAL, 538 TokenType.POSITIONAL, 539 } 540 541 JOIN_SIDES = { 542 TokenType.LEFT, 543 TokenType.RIGHT, 544 TokenType.FULL, 545 } 546 547 JOIN_KINDS = { 548 TokenType.INNER, 549 TokenType.OUTER, 550 TokenType.CROSS, 551 TokenType.SEMI, 552 TokenType.ANTI, 553 } 554 555 JOIN_HINTS: t.Set[str] = set() 556 557 LAMBDAS = { 558 TokenType.ARROW: lambda self, expressions: self.expression( 559 exp.Lambda, 560 this=self._replace_lambda( 561 self._parse_conjunction(), 562 {node.name for node in expressions}, 563 ), 564 expressions=expressions, 565 ), 566 TokenType.FARROW: lambda self, expressions: self.expression( 567 exp.Kwarg, 568 this=exp.var(expressions[0].name), 569 expression=self._parse_conjunction(), 570 ), 571 } 572 573 COLUMN_OPERATORS = { 574 TokenType.DOT: None, 575 TokenType.DCOLON: lambda self, this, to: self.expression( 576 exp.Cast if self.STRICT_CAST else exp.TryCast, 577 this=this, 578 to=to, 579 ), 580 TokenType.ARROW: lambda self, this, path: self.expression( 581 exp.JSONExtract, 582 this=this, 583 expression=self.dialect.to_json_path(path), 584 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 585 ), 586 TokenType.DARROW: lambda self, this, path: self.expression( 587 exp.JSONExtractScalar, 588 this=this, 589 expression=self.dialect.to_json_path(path), 590 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 591 ), 592 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 593 exp.JSONBExtract, 594 this=this, 595 expression=path, 596 ), 597 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 598 exp.JSONBExtractScalar, 599 this=this, 600 expression=path, 601 ), 602 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 603 exp.JSONBContains, 604 this=this, 605 expression=key, 606 ), 607 } 608 609 EXPRESSION_PARSERS = { 610 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 611 exp.Column: lambda self: self._parse_column(), 612 exp.Condition: lambda self: self._parse_conjunction(), 613 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 614 exp.Expression: lambda self: self._parse_expression(), 615 exp.From: lambda self: self._parse_from(joins=True), 616 exp.Group: lambda self: self._parse_group(), 617 exp.Having: lambda self: self._parse_having(), 618 exp.Identifier: lambda self: self._parse_id_var(), 619 exp.Join: lambda self: self._parse_join(), 620 exp.Lambda: lambda self: self._parse_lambda(), 621 exp.Lateral: lambda self: self._parse_lateral(), 622 exp.Limit: lambda self: self._parse_limit(), 623 exp.Offset: lambda self: self._parse_offset(), 624 exp.Order: lambda self: self._parse_order(), 625 exp.Ordered: lambda self: self._parse_ordered(), 626 exp.Properties: lambda self: self._parse_properties(), 627 exp.Qualify: lambda self: self._parse_qualify(), 628 exp.Returning: lambda self: self._parse_returning(), 629 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 630 exp.Table: lambda self: self._parse_table_parts(), 631 exp.TableAlias: lambda self: self._parse_table_alias(), 632 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 633 exp.Where: lambda self: self._parse_where(), 634 exp.Window: lambda self: self._parse_named_window(), 635 exp.With: lambda self: self._parse_with(), 636 "JOIN_TYPE": lambda self: self._parse_join_parts(), 637 } 638 639 STATEMENT_PARSERS = { 640 TokenType.ALTER: lambda self: self._parse_alter(), 641 TokenType.BEGIN: lambda self: self._parse_transaction(), 642 TokenType.CACHE: lambda self: self._parse_cache(), 643 TokenType.COMMENT: lambda self: self._parse_comment(), 644 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 645 TokenType.COPY: lambda self: self._parse_copy(), 646 TokenType.CREATE: lambda self: self._parse_create(), 647 TokenType.DELETE: lambda self: self._parse_delete(), 648 TokenType.DESC: lambda self: self._parse_describe(), 649 TokenType.DESCRIBE: lambda self: self._parse_describe(), 650 TokenType.DROP: lambda self: self._parse_drop(), 651 TokenType.INSERT: lambda self: self._parse_insert(), 652 TokenType.KILL: lambda self: self._parse_kill(), 653 TokenType.LOAD: lambda self: self._parse_load(), 654 TokenType.MERGE: lambda self: self._parse_merge(), 655 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 656 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 657 TokenType.REFRESH: lambda self: self._parse_refresh(), 658 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 659 TokenType.SET: lambda self: self._parse_set(), 660 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 661 TokenType.UNCACHE: lambda self: self._parse_uncache(), 662 TokenType.UPDATE: lambda self: self._parse_update(), 663 TokenType.USE: lambda self: self.expression( 664 exp.Use, 665 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 666 this=self._parse_table(schema=False), 667 ), 668 } 669 670 UNARY_PARSERS = { 671 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 672 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 673 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 674 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 675 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 676 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 677 } 678 679 STRING_PARSERS = { 680 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 681 exp.RawString, this=token.text 682 ), 683 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 684 exp.National, this=token.text 685 ), 686 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 687 TokenType.STRING: lambda self, token: self.expression( 688 exp.Literal, this=token.text, is_string=True 689 ), 690 TokenType.UNICODE_STRING: lambda self, token: self.expression( 691 exp.UnicodeString, 692 this=token.text, 693 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 694 ), 695 } 696 697 NUMERIC_PARSERS = { 698 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 699 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 700 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 701 TokenType.NUMBER: lambda self, token: self.expression( 702 exp.Literal, this=token.text, is_string=False 703 ), 704 } 705 706 PRIMARY_PARSERS = { 707 **STRING_PARSERS, 708 **NUMERIC_PARSERS, 709 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 710 TokenType.NULL: lambda self, _: self.expression(exp.Null), 711 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 712 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 713 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 714 TokenType.STAR: lambda self, _: self.expression( 715 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 716 ), 717 } 718 719 PLACEHOLDER_PARSERS = { 720 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 721 TokenType.PARAMETER: lambda self: self._parse_parameter(), 722 TokenType.COLON: lambda self: ( 723 self.expression(exp.Placeholder, this=self._prev.text) 724 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 725 else None 726 ), 727 } 728 729 RANGE_PARSERS = { 730 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 731 TokenType.GLOB: binary_range_parser(exp.Glob), 732 TokenType.ILIKE: binary_range_parser(exp.ILike), 733 TokenType.IN: lambda self, this: self._parse_in(this), 734 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 735 TokenType.IS: lambda self, this: self._parse_is(this), 736 TokenType.LIKE: binary_range_parser(exp.Like), 737 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 738 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 739 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 740 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 741 } 742 743 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 744 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 745 "AUTO": lambda self: self._parse_auto_property(), 746 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 747 "BACKUP": lambda self: self.expression( 748 exp.BackupProperty, this=self._parse_var(any_token=True) 749 ), 750 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 751 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 752 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 753 "CHECKSUM": lambda self: self._parse_checksum(), 754 "CLUSTER BY": lambda self: self._parse_cluster(), 755 "CLUSTERED": lambda self: self._parse_clustered_by(), 756 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 757 exp.CollateProperty, **kwargs 758 ), 759 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 760 "CONTAINS": lambda self: self._parse_contains_property(), 761 "COPY": lambda self: self._parse_copy_property(), 762 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 763 "DEFINER": lambda self: self._parse_definer(), 764 "DETERMINISTIC": lambda self: self.expression( 765 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 766 ), 767 "DISTKEY": lambda self: self._parse_distkey(), 768 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 769 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 770 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 771 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 772 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 773 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 774 "FREESPACE": lambda self: self._parse_freespace(), 775 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 776 "HEAP": lambda self: self.expression(exp.HeapProperty), 777 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 778 "IMMUTABLE": lambda self: self.expression( 779 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 780 ), 781 "INHERITS": lambda self: self.expression( 782 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 783 ), 784 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 785 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 786 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 787 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 788 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 789 "LIKE": lambda self: self._parse_create_like(), 790 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 791 "LOCK": lambda self: self._parse_locking(), 792 "LOCKING": lambda self: self._parse_locking(), 793 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 794 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 795 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 796 "MODIFIES": lambda self: self._parse_modifies_property(), 797 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 798 "NO": lambda self: self._parse_no_property(), 799 "ON": lambda self: self._parse_on_property(), 800 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 801 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 802 "PARTITION": lambda self: self._parse_partitioned_of(), 803 "PARTITION BY": lambda self: self._parse_partitioned_by(), 804 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 805 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 806 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 807 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 808 "READS": lambda self: self._parse_reads_property(), 809 "REMOTE": lambda self: self._parse_remote_with_connection(), 810 "RETURNS": lambda self: self._parse_returns(), 811 "ROW": lambda self: self._parse_row(), 812 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 813 "SAMPLE": lambda self: self.expression( 814 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 815 ), 816 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 817 "SETTINGS": lambda self: self.expression( 818 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 819 ), 820 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 821 "SORTKEY": lambda self: self._parse_sortkey(), 822 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 823 "STABLE": lambda self: self.expression( 824 exp.StabilityProperty, this=exp.Literal.string("STABLE") 825 ), 826 "STORED": lambda self: self._parse_stored(), 827 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 828 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 829 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 830 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 831 "TO": lambda self: self._parse_to_table(), 832 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 833 "TRANSFORM": lambda self: self.expression( 834 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 835 ), 836 "TTL": lambda self: self._parse_ttl(), 837 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 838 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 839 "VOLATILE": lambda self: self._parse_volatile_property(), 840 "WITH": lambda self: self._parse_with_property(), 841 } 842 843 CONSTRAINT_PARSERS = { 844 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 845 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 846 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 847 "CHARACTER SET": lambda self: self.expression( 848 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 849 ), 850 "CHECK": lambda self: self.expression( 851 exp.CheckColumnConstraint, 852 this=self._parse_wrapped(self._parse_conjunction), 853 enforced=self._match_text_seq("ENFORCED"), 854 ), 855 "COLLATE": lambda self: self.expression( 856 exp.CollateColumnConstraint, this=self._parse_var() 857 ), 858 "COMMENT": lambda self: self.expression( 859 exp.CommentColumnConstraint, this=self._parse_string() 860 ), 861 "COMPRESS": lambda self: self._parse_compress(), 862 "CLUSTERED": lambda self: self.expression( 863 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 864 ), 865 "NONCLUSTERED": lambda self: self.expression( 866 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 867 ), 868 "DEFAULT": lambda self: self.expression( 869 exp.DefaultColumnConstraint, this=self._parse_bitwise() 870 ), 871 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 872 "EPHEMERAL": lambda self: self.expression( 873 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 874 ), 875 "EXCLUDE": lambda self: self.expression( 876 exp.ExcludeColumnConstraint, this=self._parse_index_params() 877 ), 878 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 879 "FORMAT": lambda self: self.expression( 880 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 881 ), 882 "GENERATED": lambda self: self._parse_generated_as_identity(), 883 "IDENTITY": lambda self: self._parse_auto_increment(), 884 "INLINE": lambda self: self._parse_inline(), 885 "LIKE": lambda self: self._parse_create_like(), 886 "NOT": lambda self: self._parse_not_constraint(), 887 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 888 "ON": lambda self: ( 889 self._match(TokenType.UPDATE) 890 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 891 ) 892 or self.expression(exp.OnProperty, this=self._parse_id_var()), 893 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 894 "PERIOD": lambda self: self._parse_period_for_system_time(), 895 "PRIMARY KEY": lambda self: self._parse_primary_key(), 896 "REFERENCES": lambda self: self._parse_references(match=False), 897 "TITLE": lambda self: self.expression( 898 exp.TitleColumnConstraint, this=self._parse_var_or_string() 899 ), 900 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 901 "UNIQUE": lambda self: self._parse_unique(), 902 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 903 "WITH": lambda self: self.expression( 904 exp.Properties, expressions=self._parse_wrapped_properties() 905 ), 906 } 907 908 ALTER_PARSERS = { 909 "ADD": lambda self: self._parse_alter_table_add(), 910 "ALTER": lambda self: self._parse_alter_table_alter(), 911 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 912 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 913 "DROP": lambda self: self._parse_alter_table_drop(), 914 "RENAME": lambda self: self._parse_alter_table_rename(), 915 } 916 917 ALTER_ALTER_PARSERS = { 918 "DISTKEY": lambda self: self._parse_alter_diststyle(), 919 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 920 "SORTKEY": lambda self: self._parse_alter_sortkey(), 921 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 922 } 923 924 SCHEMA_UNNAMED_CONSTRAINTS = { 925 "CHECK", 926 "EXCLUDE", 927 "FOREIGN KEY", 928 "LIKE", 929 "PERIOD", 930 "PRIMARY KEY", 931 "UNIQUE", 932 } 933 934 NO_PAREN_FUNCTION_PARSERS = { 935 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 936 "CASE": lambda self: self._parse_case(), 937 "IF": lambda self: self._parse_if(), 938 "NEXT": lambda self: self._parse_next_value_for(), 939 } 940 941 INVALID_FUNC_NAME_TOKENS = { 942 TokenType.IDENTIFIER, 943 TokenType.STRING, 944 } 945 946 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 947 948 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 949 950 FUNCTION_PARSERS = { 951 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 952 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 953 "DECODE": lambda self: self._parse_decode(), 954 "EXTRACT": lambda self: self._parse_extract(), 955 "JSON_OBJECT": lambda self: self._parse_json_object(), 956 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 957 "JSON_TABLE": lambda self: self._parse_json_table(), 958 "MATCH": lambda self: self._parse_match_against(), 959 "OPENJSON": lambda self: self._parse_open_json(), 960 "POSITION": lambda self: self._parse_position(), 961 "PREDICT": lambda self: self._parse_predict(), 962 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 963 "STRING_AGG": lambda self: self._parse_string_agg(), 964 "SUBSTRING": lambda self: self._parse_substring(), 965 "TRIM": lambda self: self._parse_trim(), 966 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 967 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 968 } 969 970 QUERY_MODIFIER_PARSERS = { 971 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 972 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 973 TokenType.WHERE: lambda self: ("where", self._parse_where()), 974 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 975 TokenType.HAVING: lambda self: ("having", self._parse_having()), 976 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 977 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 978 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 979 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 980 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 981 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 982 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 983 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 984 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 985 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 986 TokenType.CLUSTER_BY: lambda self: ( 987 "cluster", 988 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 989 ), 990 TokenType.DISTRIBUTE_BY: lambda self: ( 991 "distribute", 992 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 993 ), 994 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 995 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 996 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 997 } 998 999 SET_PARSERS = { 1000 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1001 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1002 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1003 "TRANSACTION": lambda self: self._parse_set_transaction(), 1004 } 1005 1006 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1007 1008 TYPE_LITERAL_PARSERS = { 1009 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1010 } 1011 1012 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1013 1014 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1015 1016 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1017 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1018 "ISOLATION": ( 1019 ("LEVEL", "REPEATABLE", "READ"), 1020 ("LEVEL", "READ", "COMMITTED"), 1021 ("LEVEL", "READ", "UNCOMITTED"), 1022 ("LEVEL", "SERIALIZABLE"), 1023 ), 1024 "READ": ("WRITE", "ONLY"), 1025 } 1026 1027 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1028 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1029 ) 1030 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1031 1032 CREATE_SEQUENCE: OPTIONS_TYPE = { 1033 "SCALE": ("EXTEND", "NOEXTEND"), 1034 "SHARD": ("EXTEND", "NOEXTEND"), 1035 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1036 **dict.fromkeys( 1037 ( 1038 "SESSION", 1039 "GLOBAL", 1040 "KEEP", 1041 "NOKEEP", 1042 "ORDER", 1043 "NOORDER", 1044 "NOCACHE", 1045 "CYCLE", 1046 "NOCYCLE", 1047 "NOMINVALUE", 1048 "NOMAXVALUE", 1049 "NOSCALE", 1050 "NOSHARD", 1051 ), 1052 tuple(), 1053 ), 1054 } 1055 1056 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1057 1058 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1059 1060 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1061 1062 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1063 1064 CLONE_KEYWORDS = {"CLONE", "COPY"} 1065 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1066 1067 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1068 1069 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1070 1071 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1072 1073 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1074 1075 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1076 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1077 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1078 1079 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1080 1081 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1082 1083 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1084 1085 DISTINCT_TOKENS = {TokenType.DISTINCT} 1086 1087 NULL_TOKENS = {TokenType.NULL} 1088 1089 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1090 1091 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1092 1093 STRICT_CAST = True 1094 1095 PREFIXED_PIVOT_COLUMNS = False 1096 IDENTIFY_PIVOT_STRINGS = False 1097 1098 LOG_DEFAULTS_TO_LN = False 1099 1100 # Whether ADD is present for each column added by ALTER TABLE 1101 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1102 1103 # Whether the table sample clause expects CSV syntax 1104 TABLESAMPLE_CSV = False 1105 1106 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1107 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1108 1109 # Whether the TRIM function expects the characters to trim as its first argument 1110 TRIM_PATTERN_FIRST = False 1111 1112 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1113 STRING_ALIASES = False 1114 1115 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1116 MODIFIERS_ATTACHED_TO_UNION = True 1117 UNION_MODIFIERS = {"order", "limit", "offset"} 1118 1119 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1120 NO_PAREN_IF_COMMANDS = True 1121 1122 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1123 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1124 1125 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1126 # If this is True and '(' is not found, the keyword will be treated as an identifier 1127 VALUES_FOLLOWED_BY_PAREN = True 1128 1129 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1130 SUPPORTS_IMPLICIT_UNNEST = False 1131 1132 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1133 INTERVAL_SPANS = True 1134 1135 # Whether a PARTITION clause can follow a table reference 1136 SUPPORTS_PARTITION_SELECTION = False 1137 1138 __slots__ = ( 1139 "error_level", 1140 "error_message_context", 1141 "max_errors", 1142 "dialect", 1143 "sql", 1144 "errors", 1145 "_tokens", 1146 "_index", 1147 "_curr", 1148 "_next", 1149 "_prev", 1150 "_prev_comments", 1151 ) 1152 1153 # Autofilled 1154 SHOW_TRIE: t.Dict = {} 1155 SET_TRIE: t.Dict = {} 1156 1157 def __init__( 1158 self, 1159 error_level: t.Optional[ErrorLevel] = None, 1160 error_message_context: int = 100, 1161 max_errors: int = 3, 1162 dialect: DialectType = None, 1163 ): 1164 from sqlglot.dialects import Dialect 1165 1166 self.error_level = error_level or ErrorLevel.IMMEDIATE 1167 self.error_message_context = error_message_context 1168 self.max_errors = max_errors 1169 self.dialect = Dialect.get_or_raise(dialect) 1170 self.reset() 1171 1172 def reset(self): 1173 self.sql = "" 1174 self.errors = [] 1175 self._tokens = [] 1176 self._index = 0 1177 self._curr = None 1178 self._next = None 1179 self._prev = None 1180 self._prev_comments = None 1181 1182 def parse( 1183 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1184 ) -> t.List[t.Optional[exp.Expression]]: 1185 """ 1186 Parses a list of tokens and returns a list of syntax trees, one tree 1187 per parsed SQL statement. 1188 1189 Args: 1190 raw_tokens: The list of tokens. 1191 sql: The original SQL string, used to produce helpful debug messages. 1192 1193 Returns: 1194 The list of the produced syntax trees. 1195 """ 1196 return self._parse( 1197 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1198 ) 1199 1200 def parse_into( 1201 self, 1202 expression_types: exp.IntoType, 1203 raw_tokens: t.List[Token], 1204 sql: t.Optional[str] = None, 1205 ) -> t.List[t.Optional[exp.Expression]]: 1206 """ 1207 Parses a list of tokens into a given Expression type. If a collection of Expression 1208 types is given instead, this method will try to parse the token list into each one 1209 of them, stopping at the first for which the parsing succeeds. 1210 1211 Args: 1212 expression_types: The expression type(s) to try and parse the token list into. 1213 raw_tokens: The list of tokens. 1214 sql: The original SQL string, used to produce helpful debug messages. 1215 1216 Returns: 1217 The target Expression. 1218 """ 1219 errors = [] 1220 for expression_type in ensure_list(expression_types): 1221 parser = self.EXPRESSION_PARSERS.get(expression_type) 1222 if not parser: 1223 raise TypeError(f"No parser registered for {expression_type}") 1224 1225 try: 1226 return self._parse(parser, raw_tokens, sql) 1227 except ParseError as e: 1228 e.errors[0]["into_expression"] = expression_type 1229 errors.append(e) 1230 1231 raise ParseError( 1232 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1233 errors=merge_errors(errors), 1234 ) from errors[-1] 1235 1236 def _parse( 1237 self, 1238 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1239 raw_tokens: t.List[Token], 1240 sql: t.Optional[str] = None, 1241 ) -> t.List[t.Optional[exp.Expression]]: 1242 self.reset() 1243 self.sql = sql or "" 1244 1245 total = len(raw_tokens) 1246 chunks: t.List[t.List[Token]] = [[]] 1247 1248 for i, token in enumerate(raw_tokens): 1249 if token.token_type == TokenType.SEMICOLON: 1250 if i < total - 1: 1251 chunks.append([]) 1252 else: 1253 chunks[-1].append(token) 1254 1255 expressions = [] 1256 1257 for tokens in chunks: 1258 self._index = -1 1259 self._tokens = tokens 1260 self._advance() 1261 1262 expressions.append(parse_method(self)) 1263 1264 if self._index < len(self._tokens): 1265 self.raise_error("Invalid expression / Unexpected token") 1266 1267 self.check_errors() 1268 1269 return expressions 1270 1271 def check_errors(self) -> None: 1272 """Logs or raises any found errors, depending on the chosen error level setting.""" 1273 if self.error_level == ErrorLevel.WARN: 1274 for error in self.errors: 1275 logger.error(str(error)) 1276 elif self.error_level == ErrorLevel.RAISE and self.errors: 1277 raise ParseError( 1278 concat_messages(self.errors, self.max_errors), 1279 errors=merge_errors(self.errors), 1280 ) 1281 1282 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1283 """ 1284 Appends an error in the list of recorded errors or raises it, depending on the chosen 1285 error level setting. 1286 """ 1287 token = token or self._curr or self._prev or Token.string("") 1288 start = token.start 1289 end = token.end + 1 1290 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1291 highlight = self.sql[start:end] 1292 end_context = self.sql[end : end + self.error_message_context] 1293 1294 error = ParseError.new( 1295 f"{message}. Line {token.line}, Col: {token.col}.\n" 1296 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1297 description=message, 1298 line=token.line, 1299 col=token.col, 1300 start_context=start_context, 1301 highlight=highlight, 1302 end_context=end_context, 1303 ) 1304 1305 if self.error_level == ErrorLevel.IMMEDIATE: 1306 raise error 1307 1308 self.errors.append(error) 1309 1310 def expression( 1311 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1312 ) -> E: 1313 """ 1314 Creates a new, validated Expression. 1315 1316 Args: 1317 exp_class: The expression class to instantiate. 1318 comments: An optional list of comments to attach to the expression. 1319 kwargs: The arguments to set for the expression along with their respective values. 1320 1321 Returns: 1322 The target expression. 1323 """ 1324 instance = exp_class(**kwargs) 1325 instance.add_comments(comments) if comments else self._add_comments(instance) 1326 return self.validate_expression(instance) 1327 1328 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1329 if expression and self._prev_comments: 1330 expression.add_comments(self._prev_comments) 1331 self._prev_comments = None 1332 1333 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1334 """ 1335 Validates an Expression, making sure that all its mandatory arguments are set. 1336 1337 Args: 1338 expression: The expression to validate. 1339 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1340 1341 Returns: 1342 The validated expression. 1343 """ 1344 if self.error_level != ErrorLevel.IGNORE: 1345 for error_message in expression.error_messages(args): 1346 self.raise_error(error_message) 1347 1348 return expression 1349 1350 def _find_sql(self, start: Token, end: Token) -> str: 1351 return self.sql[start.start : end.end + 1] 1352 1353 def _is_connected(self) -> bool: 1354 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1355 1356 def _advance(self, times: int = 1) -> None: 1357 self._index += times 1358 self._curr = seq_get(self._tokens, self._index) 1359 self._next = seq_get(self._tokens, self._index + 1) 1360 1361 if self._index > 0: 1362 self._prev = self._tokens[self._index - 1] 1363 self._prev_comments = self._prev.comments 1364 else: 1365 self._prev = None 1366 self._prev_comments = None 1367 1368 def _retreat(self, index: int) -> None: 1369 if index != self._index: 1370 self._advance(index - self._index) 1371 1372 def _warn_unsupported(self) -> None: 1373 if len(self._tokens) <= 1: 1374 return 1375 1376 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1377 # interested in emitting a warning for the one being currently processed. 1378 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1379 1380 logger.warning( 1381 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1382 ) 1383 1384 def _parse_command(self) -> exp.Command: 1385 self._warn_unsupported() 1386 return self.expression( 1387 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1388 ) 1389 1390 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1391 """ 1392 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1393 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1394 the parser state accordingly 1395 """ 1396 index = self._index 1397 error_level = self.error_level 1398 1399 self.error_level = ErrorLevel.IMMEDIATE 1400 try: 1401 this = parse_method() 1402 except ParseError: 1403 this = None 1404 finally: 1405 if not this or retreat: 1406 self._retreat(index) 1407 self.error_level = error_level 1408 1409 return this 1410 1411 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1412 start = self._prev 1413 exists = self._parse_exists() if allow_exists else None 1414 1415 self._match(TokenType.ON) 1416 1417 materialized = self._match_text_seq("MATERIALIZED") 1418 kind = self._match_set(self.CREATABLES) and self._prev 1419 if not kind: 1420 return self._parse_as_command(start) 1421 1422 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1423 this = self._parse_user_defined_function(kind=kind.token_type) 1424 elif kind.token_type == TokenType.TABLE: 1425 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1426 elif kind.token_type == TokenType.COLUMN: 1427 this = self._parse_column() 1428 else: 1429 this = self._parse_id_var() 1430 1431 self._match(TokenType.IS) 1432 1433 return self.expression( 1434 exp.Comment, 1435 this=this, 1436 kind=kind.text, 1437 expression=self._parse_string(), 1438 exists=exists, 1439 materialized=materialized, 1440 ) 1441 1442 def _parse_to_table( 1443 self, 1444 ) -> exp.ToTableProperty: 1445 table = self._parse_table_parts(schema=True) 1446 return self.expression(exp.ToTableProperty, this=table) 1447 1448 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1449 def _parse_ttl(self) -> exp.Expression: 1450 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1451 this = self._parse_bitwise() 1452 1453 if self._match_text_seq("DELETE"): 1454 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1455 if self._match_text_seq("RECOMPRESS"): 1456 return self.expression( 1457 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1458 ) 1459 if self._match_text_seq("TO", "DISK"): 1460 return self.expression( 1461 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1462 ) 1463 if self._match_text_seq("TO", "VOLUME"): 1464 return self.expression( 1465 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1466 ) 1467 1468 return this 1469 1470 expressions = self._parse_csv(_parse_ttl_action) 1471 where = self._parse_where() 1472 group = self._parse_group() 1473 1474 aggregates = None 1475 if group and self._match(TokenType.SET): 1476 aggregates = self._parse_csv(self._parse_set_item) 1477 1478 return self.expression( 1479 exp.MergeTreeTTL, 1480 expressions=expressions, 1481 where=where, 1482 group=group, 1483 aggregates=aggregates, 1484 ) 1485 1486 def _parse_statement(self) -> t.Optional[exp.Expression]: 1487 if self._curr is None: 1488 return None 1489 1490 if self._match_set(self.STATEMENT_PARSERS): 1491 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1492 1493 if self._match_set(self.dialect.tokenizer.COMMANDS): 1494 return self._parse_command() 1495 1496 expression = self._parse_expression() 1497 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1498 return self._parse_query_modifiers(expression) 1499 1500 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1501 start = self._prev 1502 temporary = self._match(TokenType.TEMPORARY) 1503 materialized = self._match_text_seq("MATERIALIZED") 1504 1505 kind = self._match_set(self.CREATABLES) and self._prev.text 1506 if not kind: 1507 return self._parse_as_command(start) 1508 1509 if_exists = exists or self._parse_exists() 1510 table = self._parse_table_parts( 1511 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1512 ) 1513 1514 if self._match(TokenType.L_PAREN, advance=False): 1515 expressions = self._parse_wrapped_csv(self._parse_types) 1516 else: 1517 expressions = None 1518 1519 return self.expression( 1520 exp.Drop, 1521 comments=start.comments, 1522 exists=if_exists, 1523 this=table, 1524 expressions=expressions, 1525 kind=kind.upper(), 1526 temporary=temporary, 1527 materialized=materialized, 1528 cascade=self._match_text_seq("CASCADE"), 1529 constraints=self._match_text_seq("CONSTRAINTS"), 1530 purge=self._match_text_seq("PURGE"), 1531 ) 1532 1533 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1534 return ( 1535 self._match_text_seq("IF") 1536 and (not not_ or self._match(TokenType.NOT)) 1537 and self._match(TokenType.EXISTS) 1538 ) 1539 1540 def _parse_create(self) -> exp.Create | exp.Command: 1541 # Note: this can't be None because we've matched a statement parser 1542 start = self._prev 1543 comments = self._prev_comments 1544 1545 replace = ( 1546 start.token_type == TokenType.REPLACE 1547 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1548 or self._match_pair(TokenType.OR, TokenType.ALTER) 1549 ) 1550 1551 unique = self._match(TokenType.UNIQUE) 1552 1553 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1554 self._advance() 1555 1556 properties = None 1557 create_token = self._match_set(self.CREATABLES) and self._prev 1558 1559 if not create_token: 1560 # exp.Properties.Location.POST_CREATE 1561 properties = self._parse_properties() 1562 create_token = self._match_set(self.CREATABLES) and self._prev 1563 1564 if not properties or not create_token: 1565 return self._parse_as_command(start) 1566 1567 exists = self._parse_exists(not_=True) 1568 this = None 1569 expression: t.Optional[exp.Expression] = None 1570 indexes = None 1571 no_schema_binding = None 1572 begin = None 1573 end = None 1574 clone = None 1575 1576 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1577 nonlocal properties 1578 if properties and temp_props: 1579 properties.expressions.extend(temp_props.expressions) 1580 elif temp_props: 1581 properties = temp_props 1582 1583 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1584 this = self._parse_user_defined_function(kind=create_token.token_type) 1585 1586 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1587 extend_props(self._parse_properties()) 1588 1589 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1590 1591 if not expression: 1592 if self._match(TokenType.COMMAND): 1593 expression = self._parse_as_command(self._prev) 1594 else: 1595 begin = self._match(TokenType.BEGIN) 1596 return_ = self._match_text_seq("RETURN") 1597 1598 if self._match(TokenType.STRING, advance=False): 1599 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1600 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1601 expression = self._parse_string() 1602 extend_props(self._parse_properties()) 1603 else: 1604 expression = self._parse_statement() 1605 1606 end = self._match_text_seq("END") 1607 1608 if return_: 1609 expression = self.expression(exp.Return, this=expression) 1610 elif create_token.token_type == TokenType.INDEX: 1611 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1612 if not self._match(TokenType.ON): 1613 index = self._parse_id_var() 1614 anonymous = False 1615 else: 1616 index = None 1617 anonymous = True 1618 1619 this = self._parse_index(index=index, anonymous=anonymous) 1620 elif create_token.token_type in self.DB_CREATABLES: 1621 table_parts = self._parse_table_parts( 1622 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1623 ) 1624 1625 # exp.Properties.Location.POST_NAME 1626 self._match(TokenType.COMMA) 1627 extend_props(self._parse_properties(before=True)) 1628 1629 this = self._parse_schema(this=table_parts) 1630 1631 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1632 extend_props(self._parse_properties()) 1633 1634 self._match(TokenType.ALIAS) 1635 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1636 # exp.Properties.Location.POST_ALIAS 1637 extend_props(self._parse_properties()) 1638 1639 if create_token.token_type == TokenType.SEQUENCE: 1640 expression = self._parse_types() 1641 extend_props(self._parse_properties()) 1642 else: 1643 expression = self._parse_ddl_select() 1644 1645 if create_token.token_type == TokenType.TABLE: 1646 # exp.Properties.Location.POST_EXPRESSION 1647 extend_props(self._parse_properties()) 1648 1649 indexes = [] 1650 while True: 1651 index = self._parse_index() 1652 1653 # exp.Properties.Location.POST_INDEX 1654 extend_props(self._parse_properties()) 1655 1656 if not index: 1657 break 1658 else: 1659 self._match(TokenType.COMMA) 1660 indexes.append(index) 1661 elif create_token.token_type == TokenType.VIEW: 1662 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1663 no_schema_binding = True 1664 1665 shallow = self._match_text_seq("SHALLOW") 1666 1667 if self._match_texts(self.CLONE_KEYWORDS): 1668 copy = self._prev.text.lower() == "copy" 1669 clone = self.expression( 1670 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1671 ) 1672 1673 if self._curr: 1674 return self._parse_as_command(start) 1675 1676 return self.expression( 1677 exp.Create, 1678 comments=comments, 1679 this=this, 1680 kind=create_token.text.upper(), 1681 replace=replace, 1682 unique=unique, 1683 expression=expression, 1684 exists=exists, 1685 properties=properties, 1686 indexes=indexes, 1687 no_schema_binding=no_schema_binding, 1688 begin=begin, 1689 end=end, 1690 clone=clone, 1691 ) 1692 1693 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1694 seq = exp.SequenceProperties() 1695 1696 options = [] 1697 index = self._index 1698 1699 while self._curr: 1700 self._match(TokenType.COMMA) 1701 if self._match_text_seq("INCREMENT"): 1702 self._match_text_seq("BY") 1703 self._match_text_seq("=") 1704 seq.set("increment", self._parse_term()) 1705 elif self._match_text_seq("MINVALUE"): 1706 seq.set("minvalue", self._parse_term()) 1707 elif self._match_text_seq("MAXVALUE"): 1708 seq.set("maxvalue", self._parse_term()) 1709 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1710 self._match_text_seq("=") 1711 seq.set("start", self._parse_term()) 1712 elif self._match_text_seq("CACHE"): 1713 # T-SQL allows empty CACHE which is initialized dynamically 1714 seq.set("cache", self._parse_number() or True) 1715 elif self._match_text_seq("OWNED", "BY"): 1716 # "OWNED BY NONE" is the default 1717 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1718 else: 1719 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1720 if opt: 1721 options.append(opt) 1722 else: 1723 break 1724 1725 seq.set("options", options if options else None) 1726 return None if self._index == index else seq 1727 1728 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1729 # only used for teradata currently 1730 self._match(TokenType.COMMA) 1731 1732 kwargs = { 1733 "no": self._match_text_seq("NO"), 1734 "dual": self._match_text_seq("DUAL"), 1735 "before": self._match_text_seq("BEFORE"), 1736 "default": self._match_text_seq("DEFAULT"), 1737 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1738 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1739 "after": self._match_text_seq("AFTER"), 1740 "minimum": self._match_texts(("MIN", "MINIMUM")), 1741 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1742 } 1743 1744 if self._match_texts(self.PROPERTY_PARSERS): 1745 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1746 try: 1747 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1748 except TypeError: 1749 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1750 1751 return None 1752 1753 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1754 return self._parse_wrapped_csv(self._parse_property) 1755 1756 def _parse_property(self) -> t.Optional[exp.Expression]: 1757 if self._match_texts(self.PROPERTY_PARSERS): 1758 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1759 1760 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1761 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1762 1763 if self._match_text_seq("COMPOUND", "SORTKEY"): 1764 return self._parse_sortkey(compound=True) 1765 1766 if self._match_text_seq("SQL", "SECURITY"): 1767 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1768 1769 index = self._index 1770 key = self._parse_column() 1771 1772 if not self._match(TokenType.EQ): 1773 self._retreat(index) 1774 return self._parse_sequence_properties() 1775 1776 return self.expression( 1777 exp.Property, 1778 this=key.to_dot() if isinstance(key, exp.Column) else key, 1779 value=self._parse_bitwise() or self._parse_var(any_token=True), 1780 ) 1781 1782 def _parse_stored(self) -> exp.FileFormatProperty: 1783 self._match(TokenType.ALIAS) 1784 1785 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1786 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1787 1788 return self.expression( 1789 exp.FileFormatProperty, 1790 this=( 1791 self.expression( 1792 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1793 ) 1794 if input_format or output_format 1795 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1796 ), 1797 ) 1798 1799 def _parse_unquoted_field(self): 1800 field = self._parse_field() 1801 if isinstance(field, exp.Identifier) and not field.quoted: 1802 field = exp.var(field) 1803 1804 return field 1805 1806 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1807 self._match(TokenType.EQ) 1808 self._match(TokenType.ALIAS) 1809 1810 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1811 1812 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1813 properties = [] 1814 while True: 1815 if before: 1816 prop = self._parse_property_before() 1817 else: 1818 prop = self._parse_property() 1819 if not prop: 1820 break 1821 for p in ensure_list(prop): 1822 properties.append(p) 1823 1824 if properties: 1825 return self.expression(exp.Properties, expressions=properties) 1826 1827 return None 1828 1829 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1830 return self.expression( 1831 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1832 ) 1833 1834 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1835 if self._index >= 2: 1836 pre_volatile_token = self._tokens[self._index - 2] 1837 else: 1838 pre_volatile_token = None 1839 1840 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1841 return exp.VolatileProperty() 1842 1843 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1844 1845 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1846 self._match_pair(TokenType.EQ, TokenType.ON) 1847 1848 prop = self.expression(exp.WithSystemVersioningProperty) 1849 if self._match(TokenType.L_PAREN): 1850 self._match_text_seq("HISTORY_TABLE", "=") 1851 prop.set("this", self._parse_table_parts()) 1852 1853 if self._match(TokenType.COMMA): 1854 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1855 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1856 1857 self._match_r_paren() 1858 1859 return prop 1860 1861 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1862 if self._match(TokenType.L_PAREN, advance=False): 1863 return self._parse_wrapped_properties() 1864 1865 if self._match_text_seq("JOURNAL"): 1866 return self._parse_withjournaltable() 1867 1868 if self._match_texts(self.VIEW_ATTRIBUTES): 1869 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1870 1871 if self._match_text_seq("DATA"): 1872 return self._parse_withdata(no=False) 1873 elif self._match_text_seq("NO", "DATA"): 1874 return self._parse_withdata(no=True) 1875 1876 if not self._next: 1877 return None 1878 1879 return self._parse_withisolatedloading() 1880 1881 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1882 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1883 self._match(TokenType.EQ) 1884 1885 user = self._parse_id_var() 1886 self._match(TokenType.PARAMETER) 1887 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1888 1889 if not user or not host: 1890 return None 1891 1892 return exp.DefinerProperty(this=f"{user}@{host}") 1893 1894 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1895 self._match(TokenType.TABLE) 1896 self._match(TokenType.EQ) 1897 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1898 1899 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1900 return self.expression(exp.LogProperty, no=no) 1901 1902 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1903 return self.expression(exp.JournalProperty, **kwargs) 1904 1905 def _parse_checksum(self) -> exp.ChecksumProperty: 1906 self._match(TokenType.EQ) 1907 1908 on = None 1909 if self._match(TokenType.ON): 1910 on = True 1911 elif self._match_text_seq("OFF"): 1912 on = False 1913 1914 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1915 1916 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1917 return self.expression( 1918 exp.Cluster, 1919 expressions=( 1920 self._parse_wrapped_csv(self._parse_ordered) 1921 if wrapped 1922 else self._parse_csv(self._parse_ordered) 1923 ), 1924 ) 1925 1926 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1927 self._match_text_seq("BY") 1928 1929 self._match_l_paren() 1930 expressions = self._parse_csv(self._parse_column) 1931 self._match_r_paren() 1932 1933 if self._match_text_seq("SORTED", "BY"): 1934 self._match_l_paren() 1935 sorted_by = self._parse_csv(self._parse_ordered) 1936 self._match_r_paren() 1937 else: 1938 sorted_by = None 1939 1940 self._match(TokenType.INTO) 1941 buckets = self._parse_number() 1942 self._match_text_seq("BUCKETS") 1943 1944 return self.expression( 1945 exp.ClusteredByProperty, 1946 expressions=expressions, 1947 sorted_by=sorted_by, 1948 buckets=buckets, 1949 ) 1950 1951 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1952 if not self._match_text_seq("GRANTS"): 1953 self._retreat(self._index - 1) 1954 return None 1955 1956 return self.expression(exp.CopyGrantsProperty) 1957 1958 def _parse_freespace(self) -> exp.FreespaceProperty: 1959 self._match(TokenType.EQ) 1960 return self.expression( 1961 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1962 ) 1963 1964 def _parse_mergeblockratio( 1965 self, no: bool = False, default: bool = False 1966 ) -> exp.MergeBlockRatioProperty: 1967 if self._match(TokenType.EQ): 1968 return self.expression( 1969 exp.MergeBlockRatioProperty, 1970 this=self._parse_number(), 1971 percent=self._match(TokenType.PERCENT), 1972 ) 1973 1974 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1975 1976 def _parse_datablocksize( 1977 self, 1978 default: t.Optional[bool] = None, 1979 minimum: t.Optional[bool] = None, 1980 maximum: t.Optional[bool] = None, 1981 ) -> exp.DataBlocksizeProperty: 1982 self._match(TokenType.EQ) 1983 size = self._parse_number() 1984 1985 units = None 1986 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1987 units = self._prev.text 1988 1989 return self.expression( 1990 exp.DataBlocksizeProperty, 1991 size=size, 1992 units=units, 1993 default=default, 1994 minimum=minimum, 1995 maximum=maximum, 1996 ) 1997 1998 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1999 self._match(TokenType.EQ) 2000 always = self._match_text_seq("ALWAYS") 2001 manual = self._match_text_seq("MANUAL") 2002 never = self._match_text_seq("NEVER") 2003 default = self._match_text_seq("DEFAULT") 2004 2005 autotemp = None 2006 if self._match_text_seq("AUTOTEMP"): 2007 autotemp = self._parse_schema() 2008 2009 return self.expression( 2010 exp.BlockCompressionProperty, 2011 always=always, 2012 manual=manual, 2013 never=never, 2014 default=default, 2015 autotemp=autotemp, 2016 ) 2017 2018 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2019 index = self._index 2020 no = self._match_text_seq("NO") 2021 concurrent = self._match_text_seq("CONCURRENT") 2022 2023 if not self._match_text_seq("ISOLATED", "LOADING"): 2024 self._retreat(index) 2025 return None 2026 2027 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2028 return self.expression( 2029 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2030 ) 2031 2032 def _parse_locking(self) -> exp.LockingProperty: 2033 if self._match(TokenType.TABLE): 2034 kind = "TABLE" 2035 elif self._match(TokenType.VIEW): 2036 kind = "VIEW" 2037 elif self._match(TokenType.ROW): 2038 kind = "ROW" 2039 elif self._match_text_seq("DATABASE"): 2040 kind = "DATABASE" 2041 else: 2042 kind = None 2043 2044 if kind in ("DATABASE", "TABLE", "VIEW"): 2045 this = self._parse_table_parts() 2046 else: 2047 this = None 2048 2049 if self._match(TokenType.FOR): 2050 for_or_in = "FOR" 2051 elif self._match(TokenType.IN): 2052 for_or_in = "IN" 2053 else: 2054 for_or_in = None 2055 2056 if self._match_text_seq("ACCESS"): 2057 lock_type = "ACCESS" 2058 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2059 lock_type = "EXCLUSIVE" 2060 elif self._match_text_seq("SHARE"): 2061 lock_type = "SHARE" 2062 elif self._match_text_seq("READ"): 2063 lock_type = "READ" 2064 elif self._match_text_seq("WRITE"): 2065 lock_type = "WRITE" 2066 elif self._match_text_seq("CHECKSUM"): 2067 lock_type = "CHECKSUM" 2068 else: 2069 lock_type = None 2070 2071 override = self._match_text_seq("OVERRIDE") 2072 2073 return self.expression( 2074 exp.LockingProperty, 2075 this=this, 2076 kind=kind, 2077 for_or_in=for_or_in, 2078 lock_type=lock_type, 2079 override=override, 2080 ) 2081 2082 def _parse_partition_by(self) -> t.List[exp.Expression]: 2083 if self._match(TokenType.PARTITION_BY): 2084 return self._parse_csv(self._parse_conjunction) 2085 return [] 2086 2087 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2088 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2089 if self._match_text_seq("MINVALUE"): 2090 return exp.var("MINVALUE") 2091 if self._match_text_seq("MAXVALUE"): 2092 return exp.var("MAXVALUE") 2093 return self._parse_bitwise() 2094 2095 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2096 expression = None 2097 from_expressions = None 2098 to_expressions = None 2099 2100 if self._match(TokenType.IN): 2101 this = self._parse_wrapped_csv(self._parse_bitwise) 2102 elif self._match(TokenType.FROM): 2103 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2104 self._match_text_seq("TO") 2105 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2106 elif self._match_text_seq("WITH", "(", "MODULUS"): 2107 this = self._parse_number() 2108 self._match_text_seq(",", "REMAINDER") 2109 expression = self._parse_number() 2110 self._match_r_paren() 2111 else: 2112 self.raise_error("Failed to parse partition bound spec.") 2113 2114 return self.expression( 2115 exp.PartitionBoundSpec, 2116 this=this, 2117 expression=expression, 2118 from_expressions=from_expressions, 2119 to_expressions=to_expressions, 2120 ) 2121 2122 # https://www.postgresql.org/docs/current/sql-createtable.html 2123 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2124 if not self._match_text_seq("OF"): 2125 self._retreat(self._index - 1) 2126 return None 2127 2128 this = self._parse_table(schema=True) 2129 2130 if self._match(TokenType.DEFAULT): 2131 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2132 elif self._match_text_seq("FOR", "VALUES"): 2133 expression = self._parse_partition_bound_spec() 2134 else: 2135 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2136 2137 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2138 2139 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2140 self._match(TokenType.EQ) 2141 return self.expression( 2142 exp.PartitionedByProperty, 2143 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2144 ) 2145 2146 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2147 if self._match_text_seq("AND", "STATISTICS"): 2148 statistics = True 2149 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2150 statistics = False 2151 else: 2152 statistics = None 2153 2154 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2155 2156 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2157 if self._match_text_seq("SQL"): 2158 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2159 return None 2160 2161 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2162 if self._match_text_seq("SQL", "DATA"): 2163 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2164 return None 2165 2166 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2167 if self._match_text_seq("PRIMARY", "INDEX"): 2168 return exp.NoPrimaryIndexProperty() 2169 if self._match_text_seq("SQL"): 2170 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2171 return None 2172 2173 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2174 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2175 return exp.OnCommitProperty() 2176 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2177 return exp.OnCommitProperty(delete=True) 2178 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2179 2180 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2181 if self._match_text_seq("SQL", "DATA"): 2182 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2183 return None 2184 2185 def _parse_distkey(self) -> exp.DistKeyProperty: 2186 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2187 2188 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2189 table = self._parse_table(schema=True) 2190 2191 options = [] 2192 while self._match_texts(("INCLUDING", "EXCLUDING")): 2193 this = self._prev.text.upper() 2194 2195 id_var = self._parse_id_var() 2196 if not id_var: 2197 return None 2198 2199 options.append( 2200 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2201 ) 2202 2203 return self.expression(exp.LikeProperty, this=table, expressions=options) 2204 2205 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2206 return self.expression( 2207 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2208 ) 2209 2210 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2211 self._match(TokenType.EQ) 2212 return self.expression( 2213 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2214 ) 2215 2216 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2217 self._match_text_seq("WITH", "CONNECTION") 2218 return self.expression( 2219 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2220 ) 2221 2222 def _parse_returns(self) -> exp.ReturnsProperty: 2223 value: t.Optional[exp.Expression] 2224 is_table = self._match(TokenType.TABLE) 2225 2226 if is_table: 2227 if self._match(TokenType.LT): 2228 value = self.expression( 2229 exp.Schema, 2230 this="TABLE", 2231 expressions=self._parse_csv(self._parse_struct_types), 2232 ) 2233 if not self._match(TokenType.GT): 2234 self.raise_error("Expecting >") 2235 else: 2236 value = self._parse_schema(exp.var("TABLE")) 2237 else: 2238 value = self._parse_types() 2239 2240 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2241 2242 def _parse_describe(self) -> exp.Describe: 2243 kind = self._match_set(self.CREATABLES) and self._prev.text 2244 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2245 if self._match(TokenType.DOT): 2246 style = None 2247 self._retreat(self._index - 2) 2248 this = self._parse_table(schema=True) 2249 properties = self._parse_properties() 2250 expressions = properties.expressions if properties else None 2251 return self.expression( 2252 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2253 ) 2254 2255 def _parse_insert(self) -> exp.Insert: 2256 comments = ensure_list(self._prev_comments) 2257 hint = self._parse_hint() 2258 overwrite = self._match(TokenType.OVERWRITE) 2259 ignore = self._match(TokenType.IGNORE) 2260 local = self._match_text_seq("LOCAL") 2261 alternative = None 2262 is_function = None 2263 2264 if self._match_text_seq("DIRECTORY"): 2265 this: t.Optional[exp.Expression] = self.expression( 2266 exp.Directory, 2267 this=self._parse_var_or_string(), 2268 local=local, 2269 row_format=self._parse_row_format(match_row=True), 2270 ) 2271 else: 2272 if self._match(TokenType.OR): 2273 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2274 2275 self._match(TokenType.INTO) 2276 comments += ensure_list(self._prev_comments) 2277 self._match(TokenType.TABLE) 2278 is_function = self._match(TokenType.FUNCTION) 2279 2280 this = ( 2281 self._parse_table(schema=True, parse_partition=True) 2282 if not is_function 2283 else self._parse_function() 2284 ) 2285 2286 returning = self._parse_returning() 2287 2288 return self.expression( 2289 exp.Insert, 2290 comments=comments, 2291 hint=hint, 2292 is_function=is_function, 2293 this=this, 2294 stored=self._match_text_seq("STORED") and self._parse_stored(), 2295 by_name=self._match_text_seq("BY", "NAME"), 2296 exists=self._parse_exists(), 2297 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2298 and self._parse_conjunction(), 2299 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2300 conflict=self._parse_on_conflict(), 2301 returning=returning or self._parse_returning(), 2302 overwrite=overwrite, 2303 alternative=alternative, 2304 ignore=ignore, 2305 ) 2306 2307 def _parse_kill(self) -> exp.Kill: 2308 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2309 2310 return self.expression( 2311 exp.Kill, 2312 this=self._parse_primary(), 2313 kind=kind, 2314 ) 2315 2316 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2317 conflict = self._match_text_seq("ON", "CONFLICT") 2318 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2319 2320 if not conflict and not duplicate: 2321 return None 2322 2323 conflict_keys = None 2324 constraint = None 2325 2326 if conflict: 2327 if self._match_text_seq("ON", "CONSTRAINT"): 2328 constraint = self._parse_id_var() 2329 elif self._match(TokenType.L_PAREN): 2330 conflict_keys = self._parse_csv(self._parse_id_var) 2331 self._match_r_paren() 2332 2333 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2334 if self._prev.token_type == TokenType.UPDATE: 2335 self._match(TokenType.SET) 2336 expressions = self._parse_csv(self._parse_equality) 2337 else: 2338 expressions = None 2339 2340 return self.expression( 2341 exp.OnConflict, 2342 duplicate=duplicate, 2343 expressions=expressions, 2344 action=action, 2345 conflict_keys=conflict_keys, 2346 constraint=constraint, 2347 ) 2348 2349 def _parse_returning(self) -> t.Optional[exp.Returning]: 2350 if not self._match(TokenType.RETURNING): 2351 return None 2352 return self.expression( 2353 exp.Returning, 2354 expressions=self._parse_csv(self._parse_expression), 2355 into=self._match(TokenType.INTO) and self._parse_table_part(), 2356 ) 2357 2358 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2359 if not self._match(TokenType.FORMAT): 2360 return None 2361 return self._parse_row_format() 2362 2363 def _parse_row_format( 2364 self, match_row: bool = False 2365 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2366 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2367 return None 2368 2369 if self._match_text_seq("SERDE"): 2370 this = self._parse_string() 2371 2372 serde_properties = None 2373 if self._match(TokenType.SERDE_PROPERTIES): 2374 serde_properties = self.expression( 2375 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2376 ) 2377 2378 return self.expression( 2379 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2380 ) 2381 2382 self._match_text_seq("DELIMITED") 2383 2384 kwargs = {} 2385 2386 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2387 kwargs["fields"] = self._parse_string() 2388 if self._match_text_seq("ESCAPED", "BY"): 2389 kwargs["escaped"] = self._parse_string() 2390 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2391 kwargs["collection_items"] = self._parse_string() 2392 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2393 kwargs["map_keys"] = self._parse_string() 2394 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2395 kwargs["lines"] = self._parse_string() 2396 if self._match_text_seq("NULL", "DEFINED", "AS"): 2397 kwargs["null"] = self._parse_string() 2398 2399 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2400 2401 def _parse_load(self) -> exp.LoadData | exp.Command: 2402 if self._match_text_seq("DATA"): 2403 local = self._match_text_seq("LOCAL") 2404 self._match_text_seq("INPATH") 2405 inpath = self._parse_string() 2406 overwrite = self._match(TokenType.OVERWRITE) 2407 self._match_pair(TokenType.INTO, TokenType.TABLE) 2408 2409 return self.expression( 2410 exp.LoadData, 2411 this=self._parse_table(schema=True), 2412 local=local, 2413 overwrite=overwrite, 2414 inpath=inpath, 2415 partition=self._parse_partition(), 2416 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2417 serde=self._match_text_seq("SERDE") and self._parse_string(), 2418 ) 2419 return self._parse_as_command(self._prev) 2420 2421 def _parse_delete(self) -> exp.Delete: 2422 # This handles MySQL's "Multiple-Table Syntax" 2423 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2424 tables = None 2425 comments = self._prev_comments 2426 if not self._match(TokenType.FROM, advance=False): 2427 tables = self._parse_csv(self._parse_table) or None 2428 2429 returning = self._parse_returning() 2430 2431 return self.expression( 2432 exp.Delete, 2433 comments=comments, 2434 tables=tables, 2435 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2436 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2437 where=self._parse_where(), 2438 returning=returning or self._parse_returning(), 2439 limit=self._parse_limit(), 2440 ) 2441 2442 def _parse_update(self) -> exp.Update: 2443 comments = self._prev_comments 2444 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2445 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2446 returning = self._parse_returning() 2447 return self.expression( 2448 exp.Update, 2449 comments=comments, 2450 **{ # type: ignore 2451 "this": this, 2452 "expressions": expressions, 2453 "from": self._parse_from(joins=True), 2454 "where": self._parse_where(), 2455 "returning": returning or self._parse_returning(), 2456 "order": self._parse_order(), 2457 "limit": self._parse_limit(), 2458 }, 2459 ) 2460 2461 def _parse_uncache(self) -> exp.Uncache: 2462 if not self._match(TokenType.TABLE): 2463 self.raise_error("Expecting TABLE after UNCACHE") 2464 2465 return self.expression( 2466 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2467 ) 2468 2469 def _parse_cache(self) -> exp.Cache: 2470 lazy = self._match_text_seq("LAZY") 2471 self._match(TokenType.TABLE) 2472 table = self._parse_table(schema=True) 2473 2474 options = [] 2475 if self._match_text_seq("OPTIONS"): 2476 self._match_l_paren() 2477 k = self._parse_string() 2478 self._match(TokenType.EQ) 2479 v = self._parse_string() 2480 options = [k, v] 2481 self._match_r_paren() 2482 2483 self._match(TokenType.ALIAS) 2484 return self.expression( 2485 exp.Cache, 2486 this=table, 2487 lazy=lazy, 2488 options=options, 2489 expression=self._parse_select(nested=True), 2490 ) 2491 2492 def _parse_partition(self) -> t.Optional[exp.Partition]: 2493 if not self._match(TokenType.PARTITION): 2494 return None 2495 2496 return self.expression( 2497 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2498 ) 2499 2500 def _parse_value(self) -> t.Optional[exp.Tuple]: 2501 if self._match(TokenType.L_PAREN): 2502 expressions = self._parse_csv(self._parse_expression) 2503 self._match_r_paren() 2504 return self.expression(exp.Tuple, expressions=expressions) 2505 2506 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2507 expression = self._parse_expression() 2508 if expression: 2509 return self.expression(exp.Tuple, expressions=[expression]) 2510 return None 2511 2512 def _parse_projections(self) -> t.List[exp.Expression]: 2513 return self._parse_expressions() 2514 2515 def _parse_select( 2516 self, 2517 nested: bool = False, 2518 table: bool = False, 2519 parse_subquery_alias: bool = True, 2520 parse_set_operation: bool = True, 2521 ) -> t.Optional[exp.Expression]: 2522 cte = self._parse_with() 2523 2524 if cte: 2525 this = self._parse_statement() 2526 2527 if not this: 2528 self.raise_error("Failed to parse any statement following CTE") 2529 return cte 2530 2531 if "with" in this.arg_types: 2532 this.set("with", cte) 2533 else: 2534 self.raise_error(f"{this.key} does not support CTE") 2535 this = cte 2536 2537 return this 2538 2539 # duckdb supports leading with FROM x 2540 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2541 2542 if self._match(TokenType.SELECT): 2543 comments = self._prev_comments 2544 2545 hint = self._parse_hint() 2546 all_ = self._match(TokenType.ALL) 2547 distinct = self._match_set(self.DISTINCT_TOKENS) 2548 2549 kind = ( 2550 self._match(TokenType.ALIAS) 2551 and self._match_texts(("STRUCT", "VALUE")) 2552 and self._prev.text.upper() 2553 ) 2554 2555 if distinct: 2556 distinct = self.expression( 2557 exp.Distinct, 2558 on=self._parse_value() if self._match(TokenType.ON) else None, 2559 ) 2560 2561 if all_ and distinct: 2562 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2563 2564 limit = self._parse_limit(top=True) 2565 projections = self._parse_projections() 2566 2567 this = self.expression( 2568 exp.Select, 2569 kind=kind, 2570 hint=hint, 2571 distinct=distinct, 2572 expressions=projections, 2573 limit=limit, 2574 ) 2575 this.comments = comments 2576 2577 into = self._parse_into() 2578 if into: 2579 this.set("into", into) 2580 2581 if not from_: 2582 from_ = self._parse_from() 2583 2584 if from_: 2585 this.set("from", from_) 2586 2587 this = self._parse_query_modifiers(this) 2588 elif (table or nested) and self._match(TokenType.L_PAREN): 2589 if self._match(TokenType.PIVOT): 2590 this = self._parse_simplified_pivot() 2591 elif self._match(TokenType.FROM): 2592 this = exp.select("*").from_( 2593 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2594 ) 2595 else: 2596 this = ( 2597 self._parse_table() 2598 if table 2599 else self._parse_select(nested=True, parse_set_operation=False) 2600 ) 2601 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2602 2603 self._match_r_paren() 2604 2605 # We return early here so that the UNION isn't attached to the subquery by the 2606 # following call to _parse_set_operations, but instead becomes the parent node 2607 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2608 elif self._match(TokenType.VALUES, advance=False): 2609 this = self._parse_derived_table_values() 2610 elif from_: 2611 this = exp.select("*").from_(from_.this, copy=False) 2612 else: 2613 this = None 2614 2615 if parse_set_operation: 2616 return self._parse_set_operations(this) 2617 return this 2618 2619 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2620 if not skip_with_token and not self._match(TokenType.WITH): 2621 return None 2622 2623 comments = self._prev_comments 2624 recursive = self._match(TokenType.RECURSIVE) 2625 2626 expressions = [] 2627 while True: 2628 expressions.append(self._parse_cte()) 2629 2630 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2631 break 2632 else: 2633 self._match(TokenType.WITH) 2634 2635 return self.expression( 2636 exp.With, comments=comments, expressions=expressions, recursive=recursive 2637 ) 2638 2639 def _parse_cte(self) -> exp.CTE: 2640 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2641 if not alias or not alias.this: 2642 self.raise_error("Expected CTE to have alias") 2643 2644 self._match(TokenType.ALIAS) 2645 2646 if self._match_text_seq("NOT", "MATERIALIZED"): 2647 materialized = False 2648 elif self._match_text_seq("MATERIALIZED"): 2649 materialized = True 2650 else: 2651 materialized = None 2652 2653 return self.expression( 2654 exp.CTE, 2655 this=self._parse_wrapped(self._parse_statement), 2656 alias=alias, 2657 materialized=materialized, 2658 ) 2659 2660 def _parse_table_alias( 2661 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2662 ) -> t.Optional[exp.TableAlias]: 2663 any_token = self._match(TokenType.ALIAS) 2664 alias = ( 2665 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2666 or self._parse_string_as_identifier() 2667 ) 2668 2669 index = self._index 2670 if self._match(TokenType.L_PAREN): 2671 columns = self._parse_csv(self._parse_function_parameter) 2672 self._match_r_paren() if columns else self._retreat(index) 2673 else: 2674 columns = None 2675 2676 if not alias and not columns: 2677 return None 2678 2679 return self.expression(exp.TableAlias, this=alias, columns=columns) 2680 2681 def _parse_subquery( 2682 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2683 ) -> t.Optional[exp.Subquery]: 2684 if not this: 2685 return None 2686 2687 return self.expression( 2688 exp.Subquery, 2689 this=this, 2690 pivots=self._parse_pivots(), 2691 alias=self._parse_table_alias() if parse_alias else None, 2692 ) 2693 2694 def _implicit_unnests_to_explicit(self, this: E) -> E: 2695 from sqlglot.optimizer.normalize_identifiers import ( 2696 normalize_identifiers as _norm, 2697 ) 2698 2699 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2700 for i, join in enumerate(this.args.get("joins") or []): 2701 table = join.this 2702 normalized_table = table.copy() 2703 normalized_table.meta["maybe_column"] = True 2704 normalized_table = _norm(normalized_table, dialect=self.dialect) 2705 2706 if isinstance(table, exp.Table) and not join.args.get("on"): 2707 if normalized_table.parts[0].name in refs: 2708 table_as_column = table.to_column() 2709 unnest = exp.Unnest(expressions=[table_as_column]) 2710 2711 # Table.to_column creates a parent Alias node that we want to convert to 2712 # a TableAlias and attach to the Unnest, so it matches the parser's output 2713 if isinstance(table.args.get("alias"), exp.TableAlias): 2714 table_as_column.replace(table_as_column.this) 2715 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2716 2717 table.replace(unnest) 2718 2719 refs.add(normalized_table.alias_or_name) 2720 2721 return this 2722 2723 def _parse_query_modifiers( 2724 self, this: t.Optional[exp.Expression] 2725 ) -> t.Optional[exp.Expression]: 2726 if isinstance(this, (exp.Query, exp.Table)): 2727 for join in self._parse_joins(): 2728 this.append("joins", join) 2729 for lateral in iter(self._parse_lateral, None): 2730 this.append("laterals", lateral) 2731 2732 while True: 2733 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2734 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2735 key, expression = parser(self) 2736 2737 if expression: 2738 this.set(key, expression) 2739 if key == "limit": 2740 offset = expression.args.pop("offset", None) 2741 2742 if offset: 2743 offset = exp.Offset(expression=offset) 2744 this.set("offset", offset) 2745 2746 limit_by_expressions = expression.expressions 2747 expression.set("expressions", None) 2748 offset.set("expressions", limit_by_expressions) 2749 continue 2750 break 2751 2752 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2753 this = self._implicit_unnests_to_explicit(this) 2754 2755 return this 2756 2757 def _parse_hint(self) -> t.Optional[exp.Hint]: 2758 if self._match(TokenType.HINT): 2759 hints = [] 2760 for hint in iter( 2761 lambda: self._parse_csv( 2762 lambda: self._parse_function() or self._parse_var(upper=True) 2763 ), 2764 [], 2765 ): 2766 hints.extend(hint) 2767 2768 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2769 self.raise_error("Expected */ after HINT") 2770 2771 return self.expression(exp.Hint, expressions=hints) 2772 2773 return None 2774 2775 def _parse_into(self) -> t.Optional[exp.Into]: 2776 if not self._match(TokenType.INTO): 2777 return None 2778 2779 temp = self._match(TokenType.TEMPORARY) 2780 unlogged = self._match_text_seq("UNLOGGED") 2781 self._match(TokenType.TABLE) 2782 2783 return self.expression( 2784 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2785 ) 2786 2787 def _parse_from( 2788 self, joins: bool = False, skip_from_token: bool = False 2789 ) -> t.Optional[exp.From]: 2790 if not skip_from_token and not self._match(TokenType.FROM): 2791 return None 2792 2793 return self.expression( 2794 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2795 ) 2796 2797 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2798 return self.expression( 2799 exp.MatchRecognizeMeasure, 2800 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2801 this=self._parse_expression(), 2802 ) 2803 2804 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2805 if not self._match(TokenType.MATCH_RECOGNIZE): 2806 return None 2807 2808 self._match_l_paren() 2809 2810 partition = self._parse_partition_by() 2811 order = self._parse_order() 2812 2813 measures = ( 2814 self._parse_csv(self._parse_match_recognize_measure) 2815 if self._match_text_seq("MEASURES") 2816 else None 2817 ) 2818 2819 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2820 rows = exp.var("ONE ROW PER MATCH") 2821 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2822 text = "ALL ROWS PER MATCH" 2823 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2824 text += " SHOW EMPTY MATCHES" 2825 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2826 text += " OMIT EMPTY MATCHES" 2827 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2828 text += " WITH UNMATCHED ROWS" 2829 rows = exp.var(text) 2830 else: 2831 rows = None 2832 2833 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2834 text = "AFTER MATCH SKIP" 2835 if self._match_text_seq("PAST", "LAST", "ROW"): 2836 text += " PAST LAST ROW" 2837 elif self._match_text_seq("TO", "NEXT", "ROW"): 2838 text += " TO NEXT ROW" 2839 elif self._match_text_seq("TO", "FIRST"): 2840 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2841 elif self._match_text_seq("TO", "LAST"): 2842 text += f" TO LAST {self._advance_any().text}" # type: ignore 2843 after = exp.var(text) 2844 else: 2845 after = None 2846 2847 if self._match_text_seq("PATTERN"): 2848 self._match_l_paren() 2849 2850 if not self._curr: 2851 self.raise_error("Expecting )", self._curr) 2852 2853 paren = 1 2854 start = self._curr 2855 2856 while self._curr and paren > 0: 2857 if self._curr.token_type == TokenType.L_PAREN: 2858 paren += 1 2859 if self._curr.token_type == TokenType.R_PAREN: 2860 paren -= 1 2861 2862 end = self._prev 2863 self._advance() 2864 2865 if paren > 0: 2866 self.raise_error("Expecting )", self._curr) 2867 2868 pattern = exp.var(self._find_sql(start, end)) 2869 else: 2870 pattern = None 2871 2872 define = ( 2873 self._parse_csv(self._parse_name_as_expression) 2874 if self._match_text_seq("DEFINE") 2875 else None 2876 ) 2877 2878 self._match_r_paren() 2879 2880 return self.expression( 2881 exp.MatchRecognize, 2882 partition_by=partition, 2883 order=order, 2884 measures=measures, 2885 rows=rows, 2886 after=after, 2887 pattern=pattern, 2888 define=define, 2889 alias=self._parse_table_alias(), 2890 ) 2891 2892 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2893 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2894 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2895 cross_apply = False 2896 2897 if cross_apply is not None: 2898 this = self._parse_select(table=True) 2899 view = None 2900 outer = None 2901 elif self._match(TokenType.LATERAL): 2902 this = self._parse_select(table=True) 2903 view = self._match(TokenType.VIEW) 2904 outer = self._match(TokenType.OUTER) 2905 else: 2906 return None 2907 2908 if not this: 2909 this = ( 2910 self._parse_unnest() 2911 or self._parse_function() 2912 or self._parse_id_var(any_token=False) 2913 ) 2914 2915 while self._match(TokenType.DOT): 2916 this = exp.Dot( 2917 this=this, 2918 expression=self._parse_function() or self._parse_id_var(any_token=False), 2919 ) 2920 2921 if view: 2922 table = self._parse_id_var(any_token=False) 2923 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2924 table_alias: t.Optional[exp.TableAlias] = self.expression( 2925 exp.TableAlias, this=table, columns=columns 2926 ) 2927 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2928 # We move the alias from the lateral's child node to the lateral itself 2929 table_alias = this.args["alias"].pop() 2930 else: 2931 table_alias = self._parse_table_alias() 2932 2933 return self.expression( 2934 exp.Lateral, 2935 this=this, 2936 view=view, 2937 outer=outer, 2938 alias=table_alias, 2939 cross_apply=cross_apply, 2940 ) 2941 2942 def _parse_join_parts( 2943 self, 2944 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2945 return ( 2946 self._match_set(self.JOIN_METHODS) and self._prev, 2947 self._match_set(self.JOIN_SIDES) and self._prev, 2948 self._match_set(self.JOIN_KINDS) and self._prev, 2949 ) 2950 2951 def _parse_join( 2952 self, skip_join_token: bool = False, parse_bracket: bool = False 2953 ) -> t.Optional[exp.Join]: 2954 if self._match(TokenType.COMMA): 2955 return self.expression(exp.Join, this=self._parse_table()) 2956 2957 index = self._index 2958 method, side, kind = self._parse_join_parts() 2959 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2960 join = self._match(TokenType.JOIN) 2961 2962 if not skip_join_token and not join: 2963 self._retreat(index) 2964 kind = None 2965 method = None 2966 side = None 2967 2968 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2969 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2970 2971 if not skip_join_token and not join and not outer_apply and not cross_apply: 2972 return None 2973 2974 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2975 2976 if method: 2977 kwargs["method"] = method.text 2978 if side: 2979 kwargs["side"] = side.text 2980 if kind: 2981 kwargs["kind"] = kind.text 2982 if hint: 2983 kwargs["hint"] = hint 2984 2985 if self._match(TokenType.MATCH_CONDITION): 2986 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 2987 2988 if self._match(TokenType.ON): 2989 kwargs["on"] = self._parse_conjunction() 2990 elif self._match(TokenType.USING): 2991 kwargs["using"] = self._parse_wrapped_id_vars() 2992 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 2993 kind and kind.token_type == TokenType.CROSS 2994 ): 2995 index = self._index 2996 joins: t.Optional[list] = list(self._parse_joins()) 2997 2998 if joins and self._match(TokenType.ON): 2999 kwargs["on"] = self._parse_conjunction() 3000 elif joins and self._match(TokenType.USING): 3001 kwargs["using"] = self._parse_wrapped_id_vars() 3002 else: 3003 joins = None 3004 self._retreat(index) 3005 3006 kwargs["this"].set("joins", joins if joins else None) 3007 3008 comments = [c for token in (method, side, kind) if token for c in token.comments] 3009 return self.expression(exp.Join, comments=comments, **kwargs) 3010 3011 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3012 this = self._parse_conjunction() 3013 3014 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3015 return this 3016 3017 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3018 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3019 3020 return this 3021 3022 def _parse_index_params(self) -> exp.IndexParameters: 3023 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3024 3025 if self._match(TokenType.L_PAREN, advance=False): 3026 columns = self._parse_wrapped_csv(self._parse_with_operator) 3027 else: 3028 columns = None 3029 3030 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3031 partition_by = self._parse_partition_by() 3032 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3033 tablespace = ( 3034 self._parse_var(any_token=True) 3035 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3036 else None 3037 ) 3038 where = self._parse_where() 3039 3040 return self.expression( 3041 exp.IndexParameters, 3042 using=using, 3043 columns=columns, 3044 include=include, 3045 partition_by=partition_by, 3046 where=where, 3047 with_storage=with_storage, 3048 tablespace=tablespace, 3049 ) 3050 3051 def _parse_index( 3052 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3053 ) -> t.Optional[exp.Index]: 3054 if index or anonymous: 3055 unique = None 3056 primary = None 3057 amp = None 3058 3059 self._match(TokenType.ON) 3060 self._match(TokenType.TABLE) # hive 3061 table = self._parse_table_parts(schema=True) 3062 else: 3063 unique = self._match(TokenType.UNIQUE) 3064 primary = self._match_text_seq("PRIMARY") 3065 amp = self._match_text_seq("AMP") 3066 3067 if not self._match(TokenType.INDEX): 3068 return None 3069 3070 index = self._parse_id_var() 3071 table = None 3072 3073 params = self._parse_index_params() 3074 3075 return self.expression( 3076 exp.Index, 3077 this=index, 3078 table=table, 3079 unique=unique, 3080 primary=primary, 3081 amp=amp, 3082 params=params, 3083 ) 3084 3085 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3086 hints: t.List[exp.Expression] = [] 3087 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3088 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3089 hints.append( 3090 self.expression( 3091 exp.WithTableHint, 3092 expressions=self._parse_csv( 3093 lambda: self._parse_function() or self._parse_var(any_token=True) 3094 ), 3095 ) 3096 ) 3097 self._match_r_paren() 3098 else: 3099 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3100 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3101 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3102 3103 self._match_texts(("INDEX", "KEY")) 3104 if self._match(TokenType.FOR): 3105 hint.set("target", self._advance_any() and self._prev.text.upper()) 3106 3107 hint.set("expressions", self._parse_wrapped_id_vars()) 3108 hints.append(hint) 3109 3110 return hints or None 3111 3112 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3113 return ( 3114 (not schema and self._parse_function(optional_parens=False)) 3115 or self._parse_id_var(any_token=False) 3116 or self._parse_string_as_identifier() 3117 or self._parse_placeholder() 3118 ) 3119 3120 def _parse_table_parts( 3121 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3122 ) -> exp.Table: 3123 catalog = None 3124 db = None 3125 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3126 3127 while self._match(TokenType.DOT): 3128 if catalog: 3129 # This allows nesting the table in arbitrarily many dot expressions if needed 3130 table = self.expression( 3131 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3132 ) 3133 else: 3134 catalog = db 3135 db = table 3136 # "" used for tsql FROM a..b case 3137 table = self._parse_table_part(schema=schema) or "" 3138 3139 if ( 3140 wildcard 3141 and self._is_connected() 3142 and (isinstance(table, exp.Identifier) or not table) 3143 and self._match(TokenType.STAR) 3144 ): 3145 if isinstance(table, exp.Identifier): 3146 table.args["this"] += "*" 3147 else: 3148 table = exp.Identifier(this="*") 3149 3150 # We bubble up comments from the Identifier to the Table 3151 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3152 3153 if is_db_reference: 3154 catalog = db 3155 db = table 3156 table = None 3157 3158 if not table and not is_db_reference: 3159 self.raise_error(f"Expected table name but got {self._curr}") 3160 if not db and is_db_reference: 3161 self.raise_error(f"Expected database name but got {self._curr}") 3162 3163 return self.expression( 3164 exp.Table, 3165 comments=comments, 3166 this=table, 3167 db=db, 3168 catalog=catalog, 3169 pivots=self._parse_pivots(), 3170 ) 3171 3172 def _parse_table( 3173 self, 3174 schema: bool = False, 3175 joins: bool = False, 3176 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3177 parse_bracket: bool = False, 3178 is_db_reference: bool = False, 3179 parse_partition: bool = False, 3180 ) -> t.Optional[exp.Expression]: 3181 lateral = self._parse_lateral() 3182 if lateral: 3183 return lateral 3184 3185 unnest = self._parse_unnest() 3186 if unnest: 3187 return unnest 3188 3189 values = self._parse_derived_table_values() 3190 if values: 3191 return values 3192 3193 subquery = self._parse_select(table=True) 3194 if subquery: 3195 if not subquery.args.get("pivots"): 3196 subquery.set("pivots", self._parse_pivots()) 3197 return subquery 3198 3199 bracket = parse_bracket and self._parse_bracket(None) 3200 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3201 3202 only = self._match(TokenType.ONLY) 3203 3204 this = t.cast( 3205 exp.Expression, 3206 bracket 3207 or self._parse_bracket( 3208 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3209 ), 3210 ) 3211 3212 if only: 3213 this.set("only", only) 3214 3215 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3216 self._match_text_seq("*") 3217 3218 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3219 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3220 this.set("partition", self._parse_partition()) 3221 3222 if schema: 3223 return self._parse_schema(this=this) 3224 3225 version = self._parse_version() 3226 3227 if version: 3228 this.set("version", version) 3229 3230 if self.dialect.ALIAS_POST_TABLESAMPLE: 3231 table_sample = self._parse_table_sample() 3232 3233 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3234 if alias: 3235 this.set("alias", alias) 3236 3237 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3238 return self.expression( 3239 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3240 ) 3241 3242 this.set("hints", self._parse_table_hints()) 3243 3244 if not this.args.get("pivots"): 3245 this.set("pivots", self._parse_pivots()) 3246 3247 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3248 table_sample = self._parse_table_sample() 3249 3250 if table_sample: 3251 table_sample.set("this", this) 3252 this = table_sample 3253 3254 if joins: 3255 for join in self._parse_joins(): 3256 this.append("joins", join) 3257 3258 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3259 this.set("ordinality", True) 3260 this.set("alias", self._parse_table_alias()) 3261 3262 return this 3263 3264 def _parse_version(self) -> t.Optional[exp.Version]: 3265 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3266 this = "TIMESTAMP" 3267 elif self._match(TokenType.VERSION_SNAPSHOT): 3268 this = "VERSION" 3269 else: 3270 return None 3271 3272 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3273 kind = self._prev.text.upper() 3274 start = self._parse_bitwise() 3275 self._match_texts(("TO", "AND")) 3276 end = self._parse_bitwise() 3277 expression: t.Optional[exp.Expression] = self.expression( 3278 exp.Tuple, expressions=[start, end] 3279 ) 3280 elif self._match_text_seq("CONTAINED", "IN"): 3281 kind = "CONTAINED IN" 3282 expression = self.expression( 3283 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3284 ) 3285 elif self._match(TokenType.ALL): 3286 kind = "ALL" 3287 expression = None 3288 else: 3289 self._match_text_seq("AS", "OF") 3290 kind = "AS OF" 3291 expression = self._parse_type() 3292 3293 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3294 3295 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3296 if not self._match(TokenType.UNNEST): 3297 return None 3298 3299 expressions = self._parse_wrapped_csv(self._parse_equality) 3300 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3301 3302 alias = self._parse_table_alias() if with_alias else None 3303 3304 if alias: 3305 if self.dialect.UNNEST_COLUMN_ONLY: 3306 if alias.args.get("columns"): 3307 self.raise_error("Unexpected extra column alias in unnest.") 3308 3309 alias.set("columns", [alias.this]) 3310 alias.set("this", None) 3311 3312 columns = alias.args.get("columns") or [] 3313 if offset and len(expressions) < len(columns): 3314 offset = columns.pop() 3315 3316 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3317 self._match(TokenType.ALIAS) 3318 offset = self._parse_id_var( 3319 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3320 ) or exp.to_identifier("offset") 3321 3322 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3323 3324 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3325 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3326 if not is_derived and not self._match_text_seq("VALUES"): 3327 return None 3328 3329 expressions = self._parse_csv(self._parse_value) 3330 alias = self._parse_table_alias() 3331 3332 if is_derived: 3333 self._match_r_paren() 3334 3335 return self.expression( 3336 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3337 ) 3338 3339 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3340 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3341 as_modifier and self._match_text_seq("USING", "SAMPLE") 3342 ): 3343 return None 3344 3345 bucket_numerator = None 3346 bucket_denominator = None 3347 bucket_field = None 3348 percent = None 3349 size = None 3350 seed = None 3351 3352 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3353 matched_l_paren = self._match(TokenType.L_PAREN) 3354 3355 if self.TABLESAMPLE_CSV: 3356 num = None 3357 expressions = self._parse_csv(self._parse_primary) 3358 else: 3359 expressions = None 3360 num = ( 3361 self._parse_factor() 3362 if self._match(TokenType.NUMBER, advance=False) 3363 else self._parse_primary() or self._parse_placeholder() 3364 ) 3365 3366 if self._match_text_seq("BUCKET"): 3367 bucket_numerator = self._parse_number() 3368 self._match_text_seq("OUT", "OF") 3369 bucket_denominator = bucket_denominator = self._parse_number() 3370 self._match(TokenType.ON) 3371 bucket_field = self._parse_field() 3372 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3373 percent = num 3374 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3375 size = num 3376 else: 3377 percent = num 3378 3379 if matched_l_paren: 3380 self._match_r_paren() 3381 3382 if self._match(TokenType.L_PAREN): 3383 method = self._parse_var(upper=True) 3384 seed = self._match(TokenType.COMMA) and self._parse_number() 3385 self._match_r_paren() 3386 elif self._match_texts(("SEED", "REPEATABLE")): 3387 seed = self._parse_wrapped(self._parse_number) 3388 3389 return self.expression( 3390 exp.TableSample, 3391 expressions=expressions, 3392 method=method, 3393 bucket_numerator=bucket_numerator, 3394 bucket_denominator=bucket_denominator, 3395 bucket_field=bucket_field, 3396 percent=percent, 3397 size=size, 3398 seed=seed, 3399 ) 3400 3401 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3402 return list(iter(self._parse_pivot, None)) or None 3403 3404 def _parse_joins(self) -> t.Iterator[exp.Join]: 3405 return iter(self._parse_join, None) 3406 3407 # https://duckdb.org/docs/sql/statements/pivot 3408 def _parse_simplified_pivot(self) -> exp.Pivot: 3409 def _parse_on() -> t.Optional[exp.Expression]: 3410 this = self._parse_bitwise() 3411 return self._parse_in(this) if self._match(TokenType.IN) else this 3412 3413 this = self._parse_table() 3414 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3415 using = self._match(TokenType.USING) and self._parse_csv( 3416 lambda: self._parse_alias(self._parse_function()) 3417 ) 3418 group = self._parse_group() 3419 return self.expression( 3420 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3421 ) 3422 3423 def _parse_pivot_in(self) -> exp.In: 3424 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3425 this = self._parse_conjunction() 3426 3427 self._match(TokenType.ALIAS) 3428 alias = self._parse_field() 3429 if alias: 3430 return self.expression(exp.PivotAlias, this=this, alias=alias) 3431 3432 return this 3433 3434 value = self._parse_column() 3435 3436 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3437 self.raise_error("Expecting IN (") 3438 3439 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3440 3441 self._match_r_paren() 3442 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3443 3444 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3445 index = self._index 3446 include_nulls = None 3447 3448 if self._match(TokenType.PIVOT): 3449 unpivot = False 3450 elif self._match(TokenType.UNPIVOT): 3451 unpivot = True 3452 3453 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3454 if self._match_text_seq("INCLUDE", "NULLS"): 3455 include_nulls = True 3456 elif self._match_text_seq("EXCLUDE", "NULLS"): 3457 include_nulls = False 3458 else: 3459 return None 3460 3461 expressions = [] 3462 3463 if not self._match(TokenType.L_PAREN): 3464 self._retreat(index) 3465 return None 3466 3467 if unpivot: 3468 expressions = self._parse_csv(self._parse_column) 3469 else: 3470 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3471 3472 if not expressions: 3473 self.raise_error("Failed to parse PIVOT's aggregation list") 3474 3475 if not self._match(TokenType.FOR): 3476 self.raise_error("Expecting FOR") 3477 3478 field = self._parse_pivot_in() 3479 3480 self._match_r_paren() 3481 3482 pivot = self.expression( 3483 exp.Pivot, 3484 expressions=expressions, 3485 field=field, 3486 unpivot=unpivot, 3487 include_nulls=include_nulls, 3488 ) 3489 3490 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3491 pivot.set("alias", self._parse_table_alias()) 3492 3493 if not unpivot: 3494 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3495 3496 columns: t.List[exp.Expression] = [] 3497 for fld in pivot.args["field"].expressions: 3498 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3499 for name in names: 3500 if self.PREFIXED_PIVOT_COLUMNS: 3501 name = f"{name}_{field_name}" if name else field_name 3502 else: 3503 name = f"{field_name}_{name}" if name else field_name 3504 3505 columns.append(exp.to_identifier(name)) 3506 3507 pivot.set("columns", columns) 3508 3509 return pivot 3510 3511 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3512 return [agg.alias for agg in aggregations] 3513 3514 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3515 if not skip_where_token and not self._match(TokenType.PREWHERE): 3516 return None 3517 3518 return self.expression( 3519 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3520 ) 3521 3522 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3523 if not skip_where_token and not self._match(TokenType.WHERE): 3524 return None 3525 3526 return self.expression( 3527 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3528 ) 3529 3530 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3531 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3532 return None 3533 3534 elements: t.Dict[str, t.Any] = defaultdict(list) 3535 3536 if self._match(TokenType.ALL): 3537 elements["all"] = True 3538 elif self._match(TokenType.DISTINCT): 3539 elements["all"] = False 3540 3541 while True: 3542 expressions = self._parse_csv(self._parse_conjunction) 3543 if expressions: 3544 elements["expressions"].extend(expressions) 3545 3546 grouping_sets = self._parse_grouping_sets() 3547 if grouping_sets: 3548 elements["grouping_sets"].extend(grouping_sets) 3549 3550 rollup = None 3551 cube = None 3552 totals = None 3553 3554 index = self._index 3555 with_ = self._match(TokenType.WITH) 3556 if self._match(TokenType.ROLLUP): 3557 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3558 elements["rollup"].extend(ensure_list(rollup)) 3559 3560 if self._match(TokenType.CUBE): 3561 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3562 elements["cube"].extend(ensure_list(cube)) 3563 3564 if self._match_text_seq("TOTALS"): 3565 totals = True 3566 elements["totals"] = True # type: ignore 3567 3568 if not (grouping_sets or rollup or cube or totals): 3569 if with_: 3570 self._retreat(index) 3571 break 3572 3573 return self.expression(exp.Group, **elements) # type: ignore 3574 3575 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3576 if not self._match(TokenType.GROUPING_SETS): 3577 return None 3578 3579 return self._parse_wrapped_csv(self._parse_grouping_set) 3580 3581 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3582 if self._match(TokenType.L_PAREN): 3583 grouping_set = self._parse_csv(self._parse_column) 3584 self._match_r_paren() 3585 return self.expression(exp.Tuple, expressions=grouping_set) 3586 3587 return self._parse_column() 3588 3589 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3590 if not skip_having_token and not self._match(TokenType.HAVING): 3591 return None 3592 return self.expression(exp.Having, this=self._parse_conjunction()) 3593 3594 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3595 if not self._match(TokenType.QUALIFY): 3596 return None 3597 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3598 3599 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3600 if skip_start_token: 3601 start = None 3602 elif self._match(TokenType.START_WITH): 3603 start = self._parse_conjunction() 3604 else: 3605 return None 3606 3607 self._match(TokenType.CONNECT_BY) 3608 nocycle = self._match_text_seq("NOCYCLE") 3609 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3610 exp.Prior, this=self._parse_bitwise() 3611 ) 3612 connect = self._parse_conjunction() 3613 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3614 3615 if not start and self._match(TokenType.START_WITH): 3616 start = self._parse_conjunction() 3617 3618 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3619 3620 def _parse_name_as_expression(self) -> exp.Alias: 3621 return self.expression( 3622 exp.Alias, 3623 alias=self._parse_id_var(any_token=True), 3624 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3625 ) 3626 3627 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3628 if self._match_text_seq("INTERPOLATE"): 3629 return self._parse_wrapped_csv(self._parse_name_as_expression) 3630 return None 3631 3632 def _parse_order( 3633 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3634 ) -> t.Optional[exp.Expression]: 3635 siblings = None 3636 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3637 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3638 return this 3639 3640 siblings = True 3641 3642 return self.expression( 3643 exp.Order, 3644 this=this, 3645 expressions=self._parse_csv(self._parse_ordered), 3646 interpolate=self._parse_interpolate(), 3647 siblings=siblings, 3648 ) 3649 3650 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3651 if not self._match(token): 3652 return None 3653 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3654 3655 def _parse_ordered( 3656 self, parse_method: t.Optional[t.Callable] = None 3657 ) -> t.Optional[exp.Ordered]: 3658 this = parse_method() if parse_method else self._parse_conjunction() 3659 if not this: 3660 return None 3661 3662 asc = self._match(TokenType.ASC) 3663 desc = self._match(TokenType.DESC) or (asc and False) 3664 3665 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3666 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3667 3668 nulls_first = is_nulls_first or False 3669 explicitly_null_ordered = is_nulls_first or is_nulls_last 3670 3671 if ( 3672 not explicitly_null_ordered 3673 and ( 3674 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3675 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3676 ) 3677 and self.dialect.NULL_ORDERING != "nulls_are_last" 3678 ): 3679 nulls_first = True 3680 3681 if self._match_text_seq("WITH", "FILL"): 3682 with_fill = self.expression( 3683 exp.WithFill, 3684 **{ # type: ignore 3685 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3686 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3687 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3688 }, 3689 ) 3690 else: 3691 with_fill = None 3692 3693 return self.expression( 3694 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3695 ) 3696 3697 def _parse_limit( 3698 self, 3699 this: t.Optional[exp.Expression] = None, 3700 top: bool = False, 3701 skip_limit_token: bool = False, 3702 ) -> t.Optional[exp.Expression]: 3703 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3704 comments = self._prev_comments 3705 if top: 3706 limit_paren = self._match(TokenType.L_PAREN) 3707 expression = self._parse_term() if limit_paren else self._parse_number() 3708 3709 if limit_paren: 3710 self._match_r_paren() 3711 else: 3712 expression = self._parse_term() 3713 3714 if self._match(TokenType.COMMA): 3715 offset = expression 3716 expression = self._parse_term() 3717 else: 3718 offset = None 3719 3720 limit_exp = self.expression( 3721 exp.Limit, 3722 this=this, 3723 expression=expression, 3724 offset=offset, 3725 comments=comments, 3726 expressions=self._parse_limit_by(), 3727 ) 3728 3729 return limit_exp 3730 3731 if self._match(TokenType.FETCH): 3732 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3733 direction = self._prev.text.upper() if direction else "FIRST" 3734 3735 count = self._parse_field(tokens=self.FETCH_TOKENS) 3736 percent = self._match(TokenType.PERCENT) 3737 3738 self._match_set((TokenType.ROW, TokenType.ROWS)) 3739 3740 only = self._match_text_seq("ONLY") 3741 with_ties = self._match_text_seq("WITH", "TIES") 3742 3743 if only and with_ties: 3744 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3745 3746 return self.expression( 3747 exp.Fetch, 3748 direction=direction, 3749 count=count, 3750 percent=percent, 3751 with_ties=with_ties, 3752 ) 3753 3754 return this 3755 3756 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3757 if not self._match(TokenType.OFFSET): 3758 return this 3759 3760 count = self._parse_term() 3761 self._match_set((TokenType.ROW, TokenType.ROWS)) 3762 3763 return self.expression( 3764 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3765 ) 3766 3767 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3768 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3769 3770 def _parse_locks(self) -> t.List[exp.Lock]: 3771 locks = [] 3772 while True: 3773 if self._match_text_seq("FOR", "UPDATE"): 3774 update = True 3775 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3776 "LOCK", "IN", "SHARE", "MODE" 3777 ): 3778 update = False 3779 else: 3780 break 3781 3782 expressions = None 3783 if self._match_text_seq("OF"): 3784 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3785 3786 wait: t.Optional[bool | exp.Expression] = None 3787 if self._match_text_seq("NOWAIT"): 3788 wait = True 3789 elif self._match_text_seq("WAIT"): 3790 wait = self._parse_primary() 3791 elif self._match_text_seq("SKIP", "LOCKED"): 3792 wait = False 3793 3794 locks.append( 3795 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3796 ) 3797 3798 return locks 3799 3800 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3801 while this and self._match_set(self.SET_OPERATIONS): 3802 token_type = self._prev.token_type 3803 3804 if token_type == TokenType.UNION: 3805 operation = exp.Union 3806 elif token_type == TokenType.EXCEPT: 3807 operation = exp.Except 3808 else: 3809 operation = exp.Intersect 3810 3811 comments = self._prev.comments 3812 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3813 by_name = self._match_text_seq("BY", "NAME") 3814 expression = self._parse_select(nested=True, parse_set_operation=False) 3815 3816 this = self.expression( 3817 operation, 3818 comments=comments, 3819 this=this, 3820 distinct=distinct, 3821 by_name=by_name, 3822 expression=expression, 3823 ) 3824 3825 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3826 expression = this.expression 3827 3828 if expression: 3829 for arg in self.UNION_MODIFIERS: 3830 expr = expression.args.get(arg) 3831 if expr: 3832 this.set(arg, expr.pop()) 3833 3834 return this 3835 3836 def _parse_expression(self) -> t.Optional[exp.Expression]: 3837 return self._parse_alias(self._parse_conjunction()) 3838 3839 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3840 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3841 3842 def _parse_equality(self) -> t.Optional[exp.Expression]: 3843 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3844 3845 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3846 return self._parse_tokens(self._parse_range, self.COMPARISON) 3847 3848 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3849 this = this or self._parse_bitwise() 3850 negate = self._match(TokenType.NOT) 3851 3852 if self._match_set(self.RANGE_PARSERS): 3853 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3854 if not expression: 3855 return this 3856 3857 this = expression 3858 elif self._match(TokenType.ISNULL): 3859 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3860 3861 # Postgres supports ISNULL and NOTNULL for conditions. 3862 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3863 if self._match(TokenType.NOTNULL): 3864 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3865 this = self.expression(exp.Not, this=this) 3866 3867 if negate: 3868 this = self.expression(exp.Not, this=this) 3869 3870 if self._match(TokenType.IS): 3871 this = self._parse_is(this) 3872 3873 return this 3874 3875 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3876 index = self._index - 1 3877 negate = self._match(TokenType.NOT) 3878 3879 if self._match_text_seq("DISTINCT", "FROM"): 3880 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3881 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3882 3883 expression = self._parse_null() or self._parse_boolean() 3884 if not expression: 3885 self._retreat(index) 3886 return None 3887 3888 this = self.expression(exp.Is, this=this, expression=expression) 3889 return self.expression(exp.Not, this=this) if negate else this 3890 3891 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3892 unnest = self._parse_unnest(with_alias=False) 3893 if unnest: 3894 this = self.expression(exp.In, this=this, unnest=unnest) 3895 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3896 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3897 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3898 3899 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3900 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 3901 else: 3902 this = self.expression(exp.In, this=this, expressions=expressions) 3903 3904 if matched_l_paren: 3905 self._match_r_paren(this) 3906 elif not self._match(TokenType.R_BRACKET, expression=this): 3907 self.raise_error("Expecting ]") 3908 else: 3909 this = self.expression(exp.In, this=this, field=self._parse_field()) 3910 3911 return this 3912 3913 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3914 low = self._parse_bitwise() 3915 self._match(TokenType.AND) 3916 high = self._parse_bitwise() 3917 return self.expression(exp.Between, this=this, low=low, high=high) 3918 3919 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3920 if not self._match(TokenType.ESCAPE): 3921 return this 3922 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3923 3924 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3925 index = self._index 3926 3927 if not self._match(TokenType.INTERVAL) and match_interval: 3928 return None 3929 3930 if self._match(TokenType.STRING, advance=False): 3931 this = self._parse_primary() 3932 else: 3933 this = self._parse_term() 3934 3935 if not this or ( 3936 isinstance(this, exp.Column) 3937 and not this.table 3938 and not this.this.quoted 3939 and this.name.upper() == "IS" 3940 ): 3941 self._retreat(index) 3942 return None 3943 3944 unit = self._parse_function() or ( 3945 not self._match(TokenType.ALIAS, advance=False) 3946 and self._parse_var(any_token=True, upper=True) 3947 ) 3948 3949 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3950 # each INTERVAL expression into this canonical form so it's easy to transpile 3951 if this and this.is_number: 3952 this = exp.Literal.string(this.name) 3953 elif this and this.is_string: 3954 parts = this.name.split() 3955 3956 if len(parts) == 2: 3957 if unit: 3958 # This is not actually a unit, it's something else (e.g. a "window side") 3959 unit = None 3960 self._retreat(self._index - 1) 3961 3962 this = exp.Literal.string(parts[0]) 3963 unit = self.expression(exp.Var, this=parts[1].upper()) 3964 3965 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 3966 unit = self.expression( 3967 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 3968 ) 3969 3970 return self.expression(exp.Interval, this=this, unit=unit) 3971 3972 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3973 this = self._parse_term() 3974 3975 while True: 3976 if self._match_set(self.BITWISE): 3977 this = self.expression( 3978 self.BITWISE[self._prev.token_type], 3979 this=this, 3980 expression=self._parse_term(), 3981 ) 3982 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3983 this = self.expression( 3984 exp.DPipe, 3985 this=this, 3986 expression=self._parse_term(), 3987 safe=not self.dialect.STRICT_STRING_CONCAT, 3988 ) 3989 elif self._match(TokenType.DQMARK): 3990 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3991 elif self._match_pair(TokenType.LT, TokenType.LT): 3992 this = self.expression( 3993 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3994 ) 3995 elif self._match_pair(TokenType.GT, TokenType.GT): 3996 this = self.expression( 3997 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3998 ) 3999 else: 4000 break 4001 4002 return this 4003 4004 def _parse_term(self) -> t.Optional[exp.Expression]: 4005 return self._parse_tokens(self._parse_factor, self.TERM) 4006 4007 def _parse_factor(self) -> t.Optional[exp.Expression]: 4008 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4009 this = parse_method() 4010 4011 while self._match_set(self.FACTOR): 4012 this = self.expression( 4013 self.FACTOR[self._prev.token_type], 4014 this=this, 4015 comments=self._prev_comments, 4016 expression=parse_method(), 4017 ) 4018 if isinstance(this, exp.Div): 4019 this.args["typed"] = self.dialect.TYPED_DIVISION 4020 this.args["safe"] = self.dialect.SAFE_DIVISION 4021 4022 return this 4023 4024 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4025 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4026 4027 def _parse_unary(self) -> t.Optional[exp.Expression]: 4028 if self._match_set(self.UNARY_PARSERS): 4029 return self.UNARY_PARSERS[self._prev.token_type](self) 4030 return self._parse_at_time_zone(self._parse_type()) 4031 4032 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 4033 interval = parse_interval and self._parse_interval() 4034 if interval: 4035 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4036 while True: 4037 index = self._index 4038 self._match(TokenType.PLUS) 4039 4040 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4041 self._retreat(index) 4042 break 4043 4044 interval = self.expression( # type: ignore 4045 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4046 ) 4047 4048 return interval 4049 4050 index = self._index 4051 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4052 this = self._parse_column() 4053 4054 if data_type: 4055 if isinstance(this, exp.Literal): 4056 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4057 if parser: 4058 return parser(self, this, data_type) 4059 return self.expression(exp.Cast, this=this, to=data_type) 4060 if not data_type.expressions: 4061 self._retreat(index) 4062 return self._parse_column() 4063 return self._parse_column_ops(data_type) 4064 4065 return this and self._parse_column_ops(this) 4066 4067 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4068 this = self._parse_type() 4069 if not this: 4070 return None 4071 4072 if isinstance(this, exp.Column) and not this.table: 4073 this = exp.var(this.name.upper()) 4074 4075 return self.expression( 4076 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4077 ) 4078 4079 def _parse_types( 4080 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4081 ) -> t.Optional[exp.Expression]: 4082 index = self._index 4083 4084 prefix = self._match_text_seq("SYSUDTLIB", ".") 4085 4086 if not self._match_set(self.TYPE_TOKENS): 4087 identifier = allow_identifiers and self._parse_id_var( 4088 any_token=False, tokens=(TokenType.VAR,) 4089 ) 4090 if identifier: 4091 tokens = self.dialect.tokenize(identifier.name) 4092 4093 if len(tokens) != 1: 4094 self.raise_error("Unexpected identifier", self._prev) 4095 4096 if tokens[0].token_type in self.TYPE_TOKENS: 4097 self._prev = tokens[0] 4098 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4099 type_name = identifier.name 4100 4101 while self._match(TokenType.DOT): 4102 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4103 4104 return exp.DataType.build(type_name, udt=True) 4105 else: 4106 self._retreat(self._index - 1) 4107 return None 4108 else: 4109 return None 4110 4111 type_token = self._prev.token_type 4112 4113 if type_token == TokenType.PSEUDO_TYPE: 4114 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4115 4116 if type_token == TokenType.OBJECT_IDENTIFIER: 4117 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4118 4119 nested = type_token in self.NESTED_TYPE_TOKENS 4120 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4121 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4122 expressions = None 4123 maybe_func = False 4124 4125 if self._match(TokenType.L_PAREN): 4126 if is_struct: 4127 expressions = self._parse_csv(self._parse_struct_types) 4128 elif nested: 4129 expressions = self._parse_csv( 4130 lambda: self._parse_types( 4131 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4132 ) 4133 ) 4134 elif type_token in self.ENUM_TYPE_TOKENS: 4135 expressions = self._parse_csv(self._parse_equality) 4136 elif is_aggregate: 4137 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4138 any_token=False, tokens=(TokenType.VAR,) 4139 ) 4140 if not func_or_ident or not self._match(TokenType.COMMA): 4141 return None 4142 expressions = self._parse_csv( 4143 lambda: self._parse_types( 4144 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4145 ) 4146 ) 4147 expressions.insert(0, func_or_ident) 4148 else: 4149 expressions = self._parse_csv(self._parse_type_size) 4150 4151 if not expressions or not self._match(TokenType.R_PAREN): 4152 self._retreat(index) 4153 return None 4154 4155 maybe_func = True 4156 4157 this: t.Optional[exp.Expression] = None 4158 values: t.Optional[t.List[exp.Expression]] = None 4159 4160 if nested and self._match(TokenType.LT): 4161 if is_struct: 4162 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4163 else: 4164 expressions = self._parse_csv( 4165 lambda: self._parse_types( 4166 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4167 ) 4168 ) 4169 4170 if not self._match(TokenType.GT): 4171 self.raise_error("Expecting >") 4172 4173 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4174 values = self._parse_csv(self._parse_conjunction) 4175 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4176 4177 if type_token in self.TIMESTAMPS: 4178 if self._match_text_seq("WITH", "TIME", "ZONE"): 4179 maybe_func = False 4180 tz_type = ( 4181 exp.DataType.Type.TIMETZ 4182 if type_token in self.TIMES 4183 else exp.DataType.Type.TIMESTAMPTZ 4184 ) 4185 this = exp.DataType(this=tz_type, expressions=expressions) 4186 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4187 maybe_func = False 4188 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4189 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4190 maybe_func = False 4191 elif type_token == TokenType.INTERVAL: 4192 unit = self._parse_var(upper=True) 4193 if unit: 4194 if self._match_text_seq("TO"): 4195 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4196 4197 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4198 else: 4199 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4200 4201 if maybe_func and check_func: 4202 index2 = self._index 4203 peek = self._parse_string() 4204 4205 if not peek: 4206 self._retreat(index) 4207 return None 4208 4209 self._retreat(index2) 4210 4211 if not this: 4212 if self._match_text_seq("UNSIGNED"): 4213 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4214 if not unsigned_type_token: 4215 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4216 4217 type_token = unsigned_type_token or type_token 4218 4219 this = exp.DataType( 4220 this=exp.DataType.Type[type_token.value], 4221 expressions=expressions, 4222 nested=nested, 4223 values=values, 4224 prefix=prefix, 4225 ) 4226 4227 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4228 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4229 4230 return this 4231 4232 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4233 index = self._index 4234 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4235 self._match(TokenType.COLON) 4236 column_def = self._parse_column_def(this) 4237 4238 if type_required and ( 4239 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4240 ): 4241 self._retreat(index) 4242 return self._parse_types() 4243 4244 return column_def 4245 4246 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4247 if not self._match_text_seq("AT", "TIME", "ZONE"): 4248 return this 4249 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4250 4251 def _parse_column(self) -> t.Optional[exp.Expression]: 4252 this = self._parse_column_reference() 4253 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4254 4255 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4256 this = self._parse_field() 4257 if ( 4258 not this 4259 and self._match(TokenType.VALUES, advance=False) 4260 and self.VALUES_FOLLOWED_BY_PAREN 4261 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4262 ): 4263 this = self._parse_id_var() 4264 4265 if isinstance(this, exp.Identifier): 4266 # We bubble up comments from the Identifier to the Column 4267 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4268 4269 return this 4270 4271 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4272 this = self._parse_bracket(this) 4273 4274 while self._match_set(self.COLUMN_OPERATORS): 4275 op_token = self._prev.token_type 4276 op = self.COLUMN_OPERATORS.get(op_token) 4277 4278 if op_token == TokenType.DCOLON: 4279 field = self._parse_types() 4280 if not field: 4281 self.raise_error("Expected type") 4282 elif op and self._curr: 4283 field = self._parse_column_reference() 4284 else: 4285 field = self._parse_field(any_token=True, anonymous_func=True) 4286 4287 if isinstance(field, exp.Func) and this: 4288 # bigquery allows function calls like x.y.count(...) 4289 # SAFE.SUBSTR(...) 4290 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4291 this = exp.replace_tree( 4292 this, 4293 lambda n: ( 4294 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4295 if n.table 4296 else n.this 4297 ) 4298 if isinstance(n, exp.Column) 4299 else n, 4300 ) 4301 4302 if op: 4303 this = op(self, this, field) 4304 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4305 this = self.expression( 4306 exp.Column, 4307 this=field, 4308 table=this.this, 4309 db=this.args.get("table"), 4310 catalog=this.args.get("db"), 4311 ) 4312 else: 4313 this = self.expression(exp.Dot, this=this, expression=field) 4314 this = self._parse_bracket(this) 4315 return this 4316 4317 def _parse_primary(self) -> t.Optional[exp.Expression]: 4318 if self._match_set(self.PRIMARY_PARSERS): 4319 token_type = self._prev.token_type 4320 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4321 4322 if token_type == TokenType.STRING: 4323 expressions = [primary] 4324 while self._match(TokenType.STRING): 4325 expressions.append(exp.Literal.string(self._prev.text)) 4326 4327 if len(expressions) > 1: 4328 return self.expression(exp.Concat, expressions=expressions) 4329 4330 return primary 4331 4332 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4333 return exp.Literal.number(f"0.{self._prev.text}") 4334 4335 if self._match(TokenType.L_PAREN): 4336 comments = self._prev_comments 4337 query = self._parse_select() 4338 4339 if query: 4340 expressions = [query] 4341 else: 4342 expressions = self._parse_expressions() 4343 4344 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4345 4346 if not this and self._match(TokenType.R_PAREN, advance=False): 4347 this = self.expression(exp.Tuple) 4348 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4349 this = self._parse_set_operations( 4350 self._parse_subquery(this=this, parse_alias=False) 4351 ) 4352 elif isinstance(this, exp.Subquery): 4353 this = self._parse_subquery( 4354 this=self._parse_set_operations(this), parse_alias=False 4355 ) 4356 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4357 this = self.expression(exp.Tuple, expressions=expressions) 4358 else: 4359 this = self.expression(exp.Paren, this=this) 4360 4361 if this: 4362 this.add_comments(comments) 4363 4364 self._match_r_paren(expression=this) 4365 return this 4366 4367 return None 4368 4369 def _parse_field( 4370 self, 4371 any_token: bool = False, 4372 tokens: t.Optional[t.Collection[TokenType]] = None, 4373 anonymous_func: bool = False, 4374 ) -> t.Optional[exp.Expression]: 4375 if anonymous_func: 4376 field = ( 4377 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4378 or self._parse_primary() 4379 ) 4380 else: 4381 field = self._parse_primary() or self._parse_function( 4382 anonymous=anonymous_func, any_token=any_token 4383 ) 4384 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4385 4386 def _parse_function( 4387 self, 4388 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4389 anonymous: bool = False, 4390 optional_parens: bool = True, 4391 any_token: bool = False, 4392 ) -> t.Optional[exp.Expression]: 4393 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4394 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4395 fn_syntax = False 4396 if ( 4397 self._match(TokenType.L_BRACE, advance=False) 4398 and self._next 4399 and self._next.text.upper() == "FN" 4400 ): 4401 self._advance(2) 4402 fn_syntax = True 4403 4404 func = self._parse_function_call( 4405 functions=functions, 4406 anonymous=anonymous, 4407 optional_parens=optional_parens, 4408 any_token=any_token, 4409 ) 4410 4411 if fn_syntax: 4412 self._match(TokenType.R_BRACE) 4413 4414 return func 4415 4416 def _parse_function_call( 4417 self, 4418 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4419 anonymous: bool = False, 4420 optional_parens: bool = True, 4421 any_token: bool = False, 4422 ) -> t.Optional[exp.Expression]: 4423 if not self._curr: 4424 return None 4425 4426 comments = self._curr.comments 4427 token_type = self._curr.token_type 4428 this = self._curr.text 4429 upper = this.upper() 4430 4431 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4432 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4433 self._advance() 4434 return self._parse_window(parser(self)) 4435 4436 if not self._next or self._next.token_type != TokenType.L_PAREN: 4437 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4438 self._advance() 4439 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4440 4441 return None 4442 4443 if any_token: 4444 if token_type in self.RESERVED_TOKENS: 4445 return None 4446 elif token_type not in self.FUNC_TOKENS: 4447 return None 4448 4449 self._advance(2) 4450 4451 parser = self.FUNCTION_PARSERS.get(upper) 4452 if parser and not anonymous: 4453 this = parser(self) 4454 else: 4455 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4456 4457 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4458 this = self.expression(subquery_predicate, this=self._parse_select()) 4459 self._match_r_paren() 4460 return this 4461 4462 if functions is None: 4463 functions = self.FUNCTIONS 4464 4465 function = functions.get(upper) 4466 4467 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4468 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4469 4470 if alias: 4471 args = self._kv_to_prop_eq(args) 4472 4473 if function and not anonymous: 4474 if "dialect" in function.__code__.co_varnames: 4475 func = function(args, dialect=self.dialect) 4476 else: 4477 func = function(args) 4478 4479 func = self.validate_expression(func, args) 4480 if not self.dialect.NORMALIZE_FUNCTIONS: 4481 func.meta["name"] = this 4482 4483 this = func 4484 else: 4485 if token_type == TokenType.IDENTIFIER: 4486 this = exp.Identifier(this=this, quoted=True) 4487 this = self.expression(exp.Anonymous, this=this, expressions=args) 4488 4489 if isinstance(this, exp.Expression): 4490 this.add_comments(comments) 4491 4492 self._match_r_paren(this) 4493 return self._parse_window(this) 4494 4495 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4496 transformed = [] 4497 4498 for e in expressions: 4499 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4500 if isinstance(e, exp.Alias): 4501 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4502 4503 if not isinstance(e, exp.PropertyEQ): 4504 e = self.expression( 4505 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4506 ) 4507 4508 if isinstance(e.this, exp.Column): 4509 e.this.replace(e.this.this) 4510 4511 transformed.append(e) 4512 4513 return transformed 4514 4515 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4516 return self._parse_column_def(self._parse_id_var()) 4517 4518 def _parse_user_defined_function( 4519 self, kind: t.Optional[TokenType] = None 4520 ) -> t.Optional[exp.Expression]: 4521 this = self._parse_id_var() 4522 4523 while self._match(TokenType.DOT): 4524 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4525 4526 if not self._match(TokenType.L_PAREN): 4527 return this 4528 4529 expressions = self._parse_csv(self._parse_function_parameter) 4530 self._match_r_paren() 4531 return self.expression( 4532 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4533 ) 4534 4535 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4536 literal = self._parse_primary() 4537 if literal: 4538 return self.expression(exp.Introducer, this=token.text, expression=literal) 4539 4540 return self.expression(exp.Identifier, this=token.text) 4541 4542 def _parse_session_parameter(self) -> exp.SessionParameter: 4543 kind = None 4544 this = self._parse_id_var() or self._parse_primary() 4545 4546 if this and self._match(TokenType.DOT): 4547 kind = this.name 4548 this = self._parse_var() or self._parse_primary() 4549 4550 return self.expression(exp.SessionParameter, this=this, kind=kind) 4551 4552 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4553 index = self._index 4554 4555 if self._match(TokenType.L_PAREN): 4556 expressions = t.cast( 4557 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4558 ) 4559 4560 if not self._match(TokenType.R_PAREN): 4561 self._retreat(index) 4562 else: 4563 expressions = [self._parse_id_var()] 4564 4565 if self._match_set(self.LAMBDAS): 4566 return self.LAMBDAS[self._prev.token_type](self, expressions) 4567 4568 self._retreat(index) 4569 4570 this: t.Optional[exp.Expression] 4571 4572 if self._match(TokenType.DISTINCT): 4573 this = self.expression( 4574 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4575 ) 4576 else: 4577 this = self._parse_select_or_expression(alias=alias) 4578 4579 return self._parse_limit( 4580 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4581 ) 4582 4583 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4584 index = self._index 4585 if not self._match(TokenType.L_PAREN): 4586 return this 4587 4588 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4589 # expr can be of both types 4590 if self._match_set(self.SELECT_START_TOKENS): 4591 self._retreat(index) 4592 return this 4593 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4594 self._match_r_paren() 4595 return self.expression(exp.Schema, this=this, expressions=args) 4596 4597 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4598 return self._parse_column_def(self._parse_field(any_token=True)) 4599 4600 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4601 # column defs are not really columns, they're identifiers 4602 if isinstance(this, exp.Column): 4603 this = this.this 4604 4605 kind = self._parse_types(schema=True) 4606 4607 if self._match_text_seq("FOR", "ORDINALITY"): 4608 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4609 4610 constraints: t.List[exp.Expression] = [] 4611 4612 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4613 ("ALIAS", "MATERIALIZED") 4614 ): 4615 persisted = self._prev.text.upper() == "MATERIALIZED" 4616 constraints.append( 4617 self.expression( 4618 exp.ComputedColumnConstraint, 4619 this=self._parse_conjunction(), 4620 persisted=persisted or self._match_text_seq("PERSISTED"), 4621 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4622 ) 4623 ) 4624 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4625 self._match(TokenType.ALIAS) 4626 constraints.append( 4627 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4628 ) 4629 4630 while True: 4631 constraint = self._parse_column_constraint() 4632 if not constraint: 4633 break 4634 constraints.append(constraint) 4635 4636 if not kind and not constraints: 4637 return this 4638 4639 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4640 4641 def _parse_auto_increment( 4642 self, 4643 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4644 start = None 4645 increment = None 4646 4647 if self._match(TokenType.L_PAREN, advance=False): 4648 args = self._parse_wrapped_csv(self._parse_bitwise) 4649 start = seq_get(args, 0) 4650 increment = seq_get(args, 1) 4651 elif self._match_text_seq("START"): 4652 start = self._parse_bitwise() 4653 self._match_text_seq("INCREMENT") 4654 increment = self._parse_bitwise() 4655 4656 if start and increment: 4657 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4658 4659 return exp.AutoIncrementColumnConstraint() 4660 4661 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4662 if not self._match_text_seq("REFRESH"): 4663 self._retreat(self._index - 1) 4664 return None 4665 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4666 4667 def _parse_compress(self) -> exp.CompressColumnConstraint: 4668 if self._match(TokenType.L_PAREN, advance=False): 4669 return self.expression( 4670 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4671 ) 4672 4673 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4674 4675 def _parse_generated_as_identity( 4676 self, 4677 ) -> ( 4678 exp.GeneratedAsIdentityColumnConstraint 4679 | exp.ComputedColumnConstraint 4680 | exp.GeneratedAsRowColumnConstraint 4681 ): 4682 if self._match_text_seq("BY", "DEFAULT"): 4683 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4684 this = self.expression( 4685 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4686 ) 4687 else: 4688 self._match_text_seq("ALWAYS") 4689 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4690 4691 self._match(TokenType.ALIAS) 4692 4693 if self._match_text_seq("ROW"): 4694 start = self._match_text_seq("START") 4695 if not start: 4696 self._match(TokenType.END) 4697 hidden = self._match_text_seq("HIDDEN") 4698 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4699 4700 identity = self._match_text_seq("IDENTITY") 4701 4702 if self._match(TokenType.L_PAREN): 4703 if self._match(TokenType.START_WITH): 4704 this.set("start", self._parse_bitwise()) 4705 if self._match_text_seq("INCREMENT", "BY"): 4706 this.set("increment", self._parse_bitwise()) 4707 if self._match_text_seq("MINVALUE"): 4708 this.set("minvalue", self._parse_bitwise()) 4709 if self._match_text_seq("MAXVALUE"): 4710 this.set("maxvalue", self._parse_bitwise()) 4711 4712 if self._match_text_seq("CYCLE"): 4713 this.set("cycle", True) 4714 elif self._match_text_seq("NO", "CYCLE"): 4715 this.set("cycle", False) 4716 4717 if not identity: 4718 this.set("expression", self._parse_range()) 4719 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4720 args = self._parse_csv(self._parse_bitwise) 4721 this.set("start", seq_get(args, 0)) 4722 this.set("increment", seq_get(args, 1)) 4723 4724 self._match_r_paren() 4725 4726 return this 4727 4728 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4729 self._match_text_seq("LENGTH") 4730 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4731 4732 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4733 if self._match_text_seq("NULL"): 4734 return self.expression(exp.NotNullColumnConstraint) 4735 if self._match_text_seq("CASESPECIFIC"): 4736 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4737 if self._match_text_seq("FOR", "REPLICATION"): 4738 return self.expression(exp.NotForReplicationColumnConstraint) 4739 return None 4740 4741 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4742 if self._match(TokenType.CONSTRAINT): 4743 this = self._parse_id_var() 4744 else: 4745 this = None 4746 4747 if self._match_texts(self.CONSTRAINT_PARSERS): 4748 return self.expression( 4749 exp.ColumnConstraint, 4750 this=this, 4751 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4752 ) 4753 4754 return this 4755 4756 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4757 if not self._match(TokenType.CONSTRAINT): 4758 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4759 4760 return self.expression( 4761 exp.Constraint, 4762 this=self._parse_id_var(), 4763 expressions=self._parse_unnamed_constraints(), 4764 ) 4765 4766 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4767 constraints = [] 4768 while True: 4769 constraint = self._parse_unnamed_constraint() or self._parse_function() 4770 if not constraint: 4771 break 4772 constraints.append(constraint) 4773 4774 return constraints 4775 4776 def _parse_unnamed_constraint( 4777 self, constraints: t.Optional[t.Collection[str]] = None 4778 ) -> t.Optional[exp.Expression]: 4779 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4780 constraints or self.CONSTRAINT_PARSERS 4781 ): 4782 return None 4783 4784 constraint = self._prev.text.upper() 4785 if constraint not in self.CONSTRAINT_PARSERS: 4786 self.raise_error(f"No parser found for schema constraint {constraint}.") 4787 4788 return self.CONSTRAINT_PARSERS[constraint](self) 4789 4790 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4791 self._match_text_seq("KEY") 4792 return self.expression( 4793 exp.UniqueColumnConstraint, 4794 this=self._parse_schema(self._parse_id_var(any_token=False)), 4795 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4796 on_conflict=self._parse_on_conflict(), 4797 ) 4798 4799 def _parse_key_constraint_options(self) -> t.List[str]: 4800 options = [] 4801 while True: 4802 if not self._curr: 4803 break 4804 4805 if self._match(TokenType.ON): 4806 action = None 4807 on = self._advance_any() and self._prev.text 4808 4809 if self._match_text_seq("NO", "ACTION"): 4810 action = "NO ACTION" 4811 elif self._match_text_seq("CASCADE"): 4812 action = "CASCADE" 4813 elif self._match_text_seq("RESTRICT"): 4814 action = "RESTRICT" 4815 elif self._match_pair(TokenType.SET, TokenType.NULL): 4816 action = "SET NULL" 4817 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4818 action = "SET DEFAULT" 4819 else: 4820 self.raise_error("Invalid key constraint") 4821 4822 options.append(f"ON {on} {action}") 4823 elif self._match_text_seq("NOT", "ENFORCED"): 4824 options.append("NOT ENFORCED") 4825 elif self._match_text_seq("DEFERRABLE"): 4826 options.append("DEFERRABLE") 4827 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4828 options.append("INITIALLY DEFERRED") 4829 elif self._match_text_seq("NORELY"): 4830 options.append("NORELY") 4831 elif self._match_text_seq("MATCH", "FULL"): 4832 options.append("MATCH FULL") 4833 else: 4834 break 4835 4836 return options 4837 4838 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4839 if match and not self._match(TokenType.REFERENCES): 4840 return None 4841 4842 expressions = None 4843 this = self._parse_table(schema=True) 4844 options = self._parse_key_constraint_options() 4845 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4846 4847 def _parse_foreign_key(self) -> exp.ForeignKey: 4848 expressions = self._parse_wrapped_id_vars() 4849 reference = self._parse_references() 4850 options = {} 4851 4852 while self._match(TokenType.ON): 4853 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4854 self.raise_error("Expected DELETE or UPDATE") 4855 4856 kind = self._prev.text.lower() 4857 4858 if self._match_text_seq("NO", "ACTION"): 4859 action = "NO ACTION" 4860 elif self._match(TokenType.SET): 4861 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4862 action = "SET " + self._prev.text.upper() 4863 else: 4864 self._advance() 4865 action = self._prev.text.upper() 4866 4867 options[kind] = action 4868 4869 return self.expression( 4870 exp.ForeignKey, 4871 expressions=expressions, 4872 reference=reference, 4873 **options, # type: ignore 4874 ) 4875 4876 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4877 return self._parse_field() 4878 4879 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4880 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4881 self._retreat(self._index - 1) 4882 return None 4883 4884 id_vars = self._parse_wrapped_id_vars() 4885 return self.expression( 4886 exp.PeriodForSystemTimeConstraint, 4887 this=seq_get(id_vars, 0), 4888 expression=seq_get(id_vars, 1), 4889 ) 4890 4891 def _parse_primary_key( 4892 self, wrapped_optional: bool = False, in_props: bool = False 4893 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4894 desc = ( 4895 self._match_set((TokenType.ASC, TokenType.DESC)) 4896 and self._prev.token_type == TokenType.DESC 4897 ) 4898 4899 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4900 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4901 4902 expressions = self._parse_wrapped_csv( 4903 self._parse_primary_key_part, optional=wrapped_optional 4904 ) 4905 options = self._parse_key_constraint_options() 4906 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4907 4908 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4909 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4910 4911 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4912 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4913 return this 4914 4915 bracket_kind = self._prev.token_type 4916 expressions = self._parse_csv( 4917 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4918 ) 4919 4920 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4921 self.raise_error("Expected ]") 4922 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4923 self.raise_error("Expected }") 4924 4925 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4926 if bracket_kind == TokenType.L_BRACE: 4927 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4928 elif not this or this.name.upper() == "ARRAY": 4929 this = self.expression(exp.Array, expressions=expressions) 4930 else: 4931 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4932 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4933 4934 self._add_comments(this) 4935 return self._parse_bracket(this) 4936 4937 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4938 if self._match(TokenType.COLON): 4939 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4940 return this 4941 4942 def _parse_case(self) -> t.Optional[exp.Expression]: 4943 ifs = [] 4944 default = None 4945 4946 comments = self._prev_comments 4947 expression = self._parse_conjunction() 4948 4949 while self._match(TokenType.WHEN): 4950 this = self._parse_conjunction() 4951 self._match(TokenType.THEN) 4952 then = self._parse_conjunction() 4953 ifs.append(self.expression(exp.If, this=this, true=then)) 4954 4955 if self._match(TokenType.ELSE): 4956 default = self._parse_conjunction() 4957 4958 if not self._match(TokenType.END): 4959 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4960 default = exp.column("interval") 4961 else: 4962 self.raise_error("Expected END after CASE", self._prev) 4963 4964 return self.expression( 4965 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4966 ) 4967 4968 def _parse_if(self) -> t.Optional[exp.Expression]: 4969 if self._match(TokenType.L_PAREN): 4970 args = self._parse_csv(self._parse_conjunction) 4971 this = self.validate_expression(exp.If.from_arg_list(args), args) 4972 self._match_r_paren() 4973 else: 4974 index = self._index - 1 4975 4976 if self.NO_PAREN_IF_COMMANDS and index == 0: 4977 return self._parse_as_command(self._prev) 4978 4979 condition = self._parse_conjunction() 4980 4981 if not condition: 4982 self._retreat(index) 4983 return None 4984 4985 self._match(TokenType.THEN) 4986 true = self._parse_conjunction() 4987 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4988 self._match(TokenType.END) 4989 this = self.expression(exp.If, this=condition, true=true, false=false) 4990 4991 return this 4992 4993 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4994 if not self._match_text_seq("VALUE", "FOR"): 4995 self._retreat(self._index - 1) 4996 return None 4997 4998 return self.expression( 4999 exp.NextValueFor, 5000 this=self._parse_column(), 5001 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5002 ) 5003 5004 def _parse_extract(self) -> exp.Extract: 5005 this = self._parse_function() or self._parse_var() or self._parse_type() 5006 5007 if self._match(TokenType.FROM): 5008 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5009 5010 if not self._match(TokenType.COMMA): 5011 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5012 5013 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5014 5015 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5016 this = self._parse_conjunction() 5017 5018 if not self._match(TokenType.ALIAS): 5019 if self._match(TokenType.COMMA): 5020 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5021 5022 self.raise_error("Expected AS after CAST") 5023 5024 fmt = None 5025 to = self._parse_types() 5026 5027 if self._match(TokenType.FORMAT): 5028 fmt_string = self._parse_string() 5029 fmt = self._parse_at_time_zone(fmt_string) 5030 5031 if not to: 5032 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5033 if to.this in exp.DataType.TEMPORAL_TYPES: 5034 this = self.expression( 5035 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5036 this=this, 5037 format=exp.Literal.string( 5038 format_time( 5039 fmt_string.this if fmt_string else "", 5040 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5041 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5042 ) 5043 ), 5044 ) 5045 5046 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5047 this.set("zone", fmt.args["zone"]) 5048 return this 5049 elif not to: 5050 self.raise_error("Expected TYPE after CAST") 5051 elif isinstance(to, exp.Identifier): 5052 to = exp.DataType.build(to.name, udt=True) 5053 elif to.this == exp.DataType.Type.CHAR: 5054 if self._match(TokenType.CHARACTER_SET): 5055 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5056 5057 return self.expression( 5058 exp.Cast if strict else exp.TryCast, 5059 this=this, 5060 to=to, 5061 format=fmt, 5062 safe=safe, 5063 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5064 ) 5065 5066 def _parse_string_agg(self) -> exp.Expression: 5067 if self._match(TokenType.DISTINCT): 5068 args: t.List[t.Optional[exp.Expression]] = [ 5069 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5070 ] 5071 if self._match(TokenType.COMMA): 5072 args.extend(self._parse_csv(self._parse_conjunction)) 5073 else: 5074 args = self._parse_csv(self._parse_conjunction) # type: ignore 5075 5076 index = self._index 5077 if not self._match(TokenType.R_PAREN) and args: 5078 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5079 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5080 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5081 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5082 5083 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5084 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5085 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5086 if not self._match_text_seq("WITHIN", "GROUP"): 5087 self._retreat(index) 5088 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5089 5090 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5091 order = self._parse_order(this=seq_get(args, 0)) 5092 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5093 5094 def _parse_convert( 5095 self, strict: bool, safe: t.Optional[bool] = None 5096 ) -> t.Optional[exp.Expression]: 5097 this = self._parse_bitwise() 5098 5099 if self._match(TokenType.USING): 5100 to: t.Optional[exp.Expression] = self.expression( 5101 exp.CharacterSet, this=self._parse_var() 5102 ) 5103 elif self._match(TokenType.COMMA): 5104 to = self._parse_types() 5105 else: 5106 to = None 5107 5108 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5109 5110 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5111 """ 5112 There are generally two variants of the DECODE function: 5113 5114 - DECODE(bin, charset) 5115 - DECODE(expression, search, result [, search, result] ... [, default]) 5116 5117 The second variant will always be parsed into a CASE expression. Note that NULL 5118 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5119 instead of relying on pattern matching. 5120 """ 5121 args = self._parse_csv(self._parse_conjunction) 5122 5123 if len(args) < 3: 5124 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5125 5126 expression, *expressions = args 5127 if not expression: 5128 return None 5129 5130 ifs = [] 5131 for search, result in zip(expressions[::2], expressions[1::2]): 5132 if not search or not result: 5133 return None 5134 5135 if isinstance(search, exp.Literal): 5136 ifs.append( 5137 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5138 ) 5139 elif isinstance(search, exp.Null): 5140 ifs.append( 5141 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5142 ) 5143 else: 5144 cond = exp.or_( 5145 exp.EQ(this=expression.copy(), expression=search), 5146 exp.and_( 5147 exp.Is(this=expression.copy(), expression=exp.Null()), 5148 exp.Is(this=search.copy(), expression=exp.Null()), 5149 copy=False, 5150 ), 5151 copy=False, 5152 ) 5153 ifs.append(exp.If(this=cond, true=result)) 5154 5155 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5156 5157 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5158 self._match_text_seq("KEY") 5159 key = self._parse_column() 5160 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5161 self._match_text_seq("VALUE") 5162 value = self._parse_bitwise() 5163 5164 if not key and not value: 5165 return None 5166 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5167 5168 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5169 if not this or not self._match_text_seq("FORMAT", "JSON"): 5170 return this 5171 5172 return self.expression(exp.FormatJson, this=this) 5173 5174 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5175 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5176 for value in values: 5177 if self._match_text_seq(value, "ON", on): 5178 return f"{value} ON {on}" 5179 5180 return None 5181 5182 @t.overload 5183 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5184 5185 @t.overload 5186 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5187 5188 def _parse_json_object(self, agg=False): 5189 star = self._parse_star() 5190 expressions = ( 5191 [star] 5192 if star 5193 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5194 ) 5195 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5196 5197 unique_keys = None 5198 if self._match_text_seq("WITH", "UNIQUE"): 5199 unique_keys = True 5200 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5201 unique_keys = False 5202 5203 self._match_text_seq("KEYS") 5204 5205 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5206 self._parse_type() 5207 ) 5208 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5209 5210 return self.expression( 5211 exp.JSONObjectAgg if agg else exp.JSONObject, 5212 expressions=expressions, 5213 null_handling=null_handling, 5214 unique_keys=unique_keys, 5215 return_type=return_type, 5216 encoding=encoding, 5217 ) 5218 5219 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5220 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5221 if not self._match_text_seq("NESTED"): 5222 this = self._parse_id_var() 5223 kind = self._parse_types(allow_identifiers=False) 5224 nested = None 5225 else: 5226 this = None 5227 kind = None 5228 nested = True 5229 5230 path = self._match_text_seq("PATH") and self._parse_string() 5231 nested_schema = nested and self._parse_json_schema() 5232 5233 return self.expression( 5234 exp.JSONColumnDef, 5235 this=this, 5236 kind=kind, 5237 path=path, 5238 nested_schema=nested_schema, 5239 ) 5240 5241 def _parse_json_schema(self) -> exp.JSONSchema: 5242 self._match_text_seq("COLUMNS") 5243 return self.expression( 5244 exp.JSONSchema, 5245 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5246 ) 5247 5248 def _parse_json_table(self) -> exp.JSONTable: 5249 this = self._parse_format_json(self._parse_bitwise()) 5250 path = self._match(TokenType.COMMA) and self._parse_string() 5251 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5252 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5253 schema = self._parse_json_schema() 5254 5255 return exp.JSONTable( 5256 this=this, 5257 schema=schema, 5258 path=path, 5259 error_handling=error_handling, 5260 empty_handling=empty_handling, 5261 ) 5262 5263 def _parse_match_against(self) -> exp.MatchAgainst: 5264 expressions = self._parse_csv(self._parse_column) 5265 5266 self._match_text_seq(")", "AGAINST", "(") 5267 5268 this = self._parse_string() 5269 5270 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5271 modifier = "IN NATURAL LANGUAGE MODE" 5272 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5273 modifier = f"{modifier} WITH QUERY EXPANSION" 5274 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5275 modifier = "IN BOOLEAN MODE" 5276 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5277 modifier = "WITH QUERY EXPANSION" 5278 else: 5279 modifier = None 5280 5281 return self.expression( 5282 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5283 ) 5284 5285 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5286 def _parse_open_json(self) -> exp.OpenJSON: 5287 this = self._parse_bitwise() 5288 path = self._match(TokenType.COMMA) and self._parse_string() 5289 5290 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5291 this = self._parse_field(any_token=True) 5292 kind = self._parse_types() 5293 path = self._parse_string() 5294 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5295 5296 return self.expression( 5297 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5298 ) 5299 5300 expressions = None 5301 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5302 self._match_l_paren() 5303 expressions = self._parse_csv(_parse_open_json_column_def) 5304 5305 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5306 5307 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5308 args = self._parse_csv(self._parse_bitwise) 5309 5310 if self._match(TokenType.IN): 5311 return self.expression( 5312 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5313 ) 5314 5315 if haystack_first: 5316 haystack = seq_get(args, 0) 5317 needle = seq_get(args, 1) 5318 else: 5319 needle = seq_get(args, 0) 5320 haystack = seq_get(args, 1) 5321 5322 return self.expression( 5323 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5324 ) 5325 5326 def _parse_predict(self) -> exp.Predict: 5327 self._match_text_seq("MODEL") 5328 this = self._parse_table() 5329 5330 self._match(TokenType.COMMA) 5331 self._match_text_seq("TABLE") 5332 5333 return self.expression( 5334 exp.Predict, 5335 this=this, 5336 expression=self._parse_table(), 5337 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5338 ) 5339 5340 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5341 args = self._parse_csv(self._parse_table) 5342 return exp.JoinHint(this=func_name.upper(), expressions=args) 5343 5344 def _parse_substring(self) -> exp.Substring: 5345 # Postgres supports the form: substring(string [from int] [for int]) 5346 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5347 5348 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5349 5350 if self._match(TokenType.FROM): 5351 args.append(self._parse_bitwise()) 5352 if self._match(TokenType.FOR): 5353 if len(args) == 1: 5354 args.append(exp.Literal.number(1)) 5355 args.append(self._parse_bitwise()) 5356 5357 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5358 5359 def _parse_trim(self) -> exp.Trim: 5360 # https://www.w3resource.com/sql/character-functions/trim.php 5361 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5362 5363 position = None 5364 collation = None 5365 expression = None 5366 5367 if self._match_texts(self.TRIM_TYPES): 5368 position = self._prev.text.upper() 5369 5370 this = self._parse_bitwise() 5371 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5372 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5373 expression = self._parse_bitwise() 5374 5375 if invert_order: 5376 this, expression = expression, this 5377 5378 if self._match(TokenType.COLLATE): 5379 collation = self._parse_bitwise() 5380 5381 return self.expression( 5382 exp.Trim, this=this, position=position, expression=expression, collation=collation 5383 ) 5384 5385 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5386 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5387 5388 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5389 return self._parse_window(self._parse_id_var(), alias=True) 5390 5391 def _parse_respect_or_ignore_nulls( 5392 self, this: t.Optional[exp.Expression] 5393 ) -> t.Optional[exp.Expression]: 5394 if self._match_text_seq("IGNORE", "NULLS"): 5395 return self.expression(exp.IgnoreNulls, this=this) 5396 if self._match_text_seq("RESPECT", "NULLS"): 5397 return self.expression(exp.RespectNulls, this=this) 5398 return this 5399 5400 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5401 if self._match(TokenType.HAVING): 5402 self._match_texts(("MAX", "MIN")) 5403 max = self._prev.text.upper() != "MIN" 5404 return self.expression( 5405 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5406 ) 5407 5408 return this 5409 5410 def _parse_window( 5411 self, this: t.Optional[exp.Expression], alias: bool = False 5412 ) -> t.Optional[exp.Expression]: 5413 func = this 5414 comments = func.comments if isinstance(func, exp.Expression) else None 5415 5416 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5417 self._match(TokenType.WHERE) 5418 this = self.expression( 5419 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5420 ) 5421 self._match_r_paren() 5422 5423 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5424 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5425 if self._match_text_seq("WITHIN", "GROUP"): 5426 order = self._parse_wrapped(self._parse_order) 5427 this = self.expression(exp.WithinGroup, this=this, expression=order) 5428 5429 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5430 # Some dialects choose to implement and some do not. 5431 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5432 5433 # There is some code above in _parse_lambda that handles 5434 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5435 5436 # The below changes handle 5437 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5438 5439 # Oracle allows both formats 5440 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5441 # and Snowflake chose to do the same for familiarity 5442 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5443 if isinstance(this, exp.AggFunc): 5444 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5445 5446 if ignore_respect and ignore_respect is not this: 5447 ignore_respect.replace(ignore_respect.this) 5448 this = self.expression(ignore_respect.__class__, this=this) 5449 5450 this = self._parse_respect_or_ignore_nulls(this) 5451 5452 # bigquery select from window x AS (partition by ...) 5453 if alias: 5454 over = None 5455 self._match(TokenType.ALIAS) 5456 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5457 return this 5458 else: 5459 over = self._prev.text.upper() 5460 5461 if comments and isinstance(func, exp.Expression): 5462 func.pop_comments() 5463 5464 if not self._match(TokenType.L_PAREN): 5465 return self.expression( 5466 exp.Window, 5467 comments=comments, 5468 this=this, 5469 alias=self._parse_id_var(False), 5470 over=over, 5471 ) 5472 5473 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5474 5475 first = self._match(TokenType.FIRST) 5476 if self._match_text_seq("LAST"): 5477 first = False 5478 5479 partition, order = self._parse_partition_and_order() 5480 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5481 5482 if kind: 5483 self._match(TokenType.BETWEEN) 5484 start = self._parse_window_spec() 5485 self._match(TokenType.AND) 5486 end = self._parse_window_spec() 5487 5488 spec = self.expression( 5489 exp.WindowSpec, 5490 kind=kind, 5491 start=start["value"], 5492 start_side=start["side"], 5493 end=end["value"], 5494 end_side=end["side"], 5495 ) 5496 else: 5497 spec = None 5498 5499 self._match_r_paren() 5500 5501 window = self.expression( 5502 exp.Window, 5503 comments=comments, 5504 this=this, 5505 partition_by=partition, 5506 order=order, 5507 spec=spec, 5508 alias=window_alias, 5509 over=over, 5510 first=first, 5511 ) 5512 5513 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5514 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5515 return self._parse_window(window, alias=alias) 5516 5517 return window 5518 5519 def _parse_partition_and_order( 5520 self, 5521 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5522 return self._parse_partition_by(), self._parse_order() 5523 5524 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5525 self._match(TokenType.BETWEEN) 5526 5527 return { 5528 "value": ( 5529 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5530 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5531 or self._parse_bitwise() 5532 ), 5533 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5534 } 5535 5536 def _parse_alias( 5537 self, this: t.Optional[exp.Expression], explicit: bool = False 5538 ) -> t.Optional[exp.Expression]: 5539 any_token = self._match(TokenType.ALIAS) 5540 comments = self._prev_comments or [] 5541 5542 if explicit and not any_token: 5543 return this 5544 5545 if self._match(TokenType.L_PAREN): 5546 aliases = self.expression( 5547 exp.Aliases, 5548 comments=comments, 5549 this=this, 5550 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5551 ) 5552 self._match_r_paren(aliases) 5553 return aliases 5554 5555 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5556 self.STRING_ALIASES and self._parse_string_as_identifier() 5557 ) 5558 5559 if alias: 5560 comments.extend(alias.pop_comments()) 5561 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5562 column = this.this 5563 5564 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5565 if not this.comments and column and column.comments: 5566 this.comments = column.pop_comments() 5567 5568 return this 5569 5570 def _parse_id_var( 5571 self, 5572 any_token: bool = True, 5573 tokens: t.Optional[t.Collection[TokenType]] = None, 5574 ) -> t.Optional[exp.Expression]: 5575 expression = self._parse_identifier() 5576 if not expression and ( 5577 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5578 ): 5579 quoted = self._prev.token_type == TokenType.STRING 5580 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5581 5582 return expression 5583 5584 def _parse_string(self) -> t.Optional[exp.Expression]: 5585 if self._match_set(self.STRING_PARSERS): 5586 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5587 return self._parse_placeholder() 5588 5589 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5590 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5591 5592 def _parse_number(self) -> t.Optional[exp.Expression]: 5593 if self._match_set(self.NUMERIC_PARSERS): 5594 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5595 return self._parse_placeholder() 5596 5597 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5598 if self._match(TokenType.IDENTIFIER): 5599 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5600 return self._parse_placeholder() 5601 5602 def _parse_var( 5603 self, 5604 any_token: bool = False, 5605 tokens: t.Optional[t.Collection[TokenType]] = None, 5606 upper: bool = False, 5607 ) -> t.Optional[exp.Expression]: 5608 if ( 5609 (any_token and self._advance_any()) 5610 or self._match(TokenType.VAR) 5611 or (self._match_set(tokens) if tokens else False) 5612 ): 5613 return self.expression( 5614 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5615 ) 5616 return self._parse_placeholder() 5617 5618 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5619 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5620 self._advance() 5621 return self._prev 5622 return None 5623 5624 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5625 return self._parse_var() or self._parse_string() 5626 5627 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5628 return self._parse_primary() or self._parse_var(any_token=True) 5629 5630 def _parse_null(self) -> t.Optional[exp.Expression]: 5631 if self._match_set(self.NULL_TOKENS): 5632 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5633 return self._parse_placeholder() 5634 5635 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5636 if self._match(TokenType.TRUE): 5637 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5638 if self._match(TokenType.FALSE): 5639 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5640 return self._parse_placeholder() 5641 5642 def _parse_star(self) -> t.Optional[exp.Expression]: 5643 if self._match(TokenType.STAR): 5644 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5645 return self._parse_placeholder() 5646 5647 def _parse_parameter(self) -> exp.Parameter: 5648 self._match(TokenType.L_BRACE) 5649 this = self._parse_identifier() or self._parse_primary_or_var() 5650 expression = self._match(TokenType.COLON) and ( 5651 self._parse_identifier() or self._parse_primary_or_var() 5652 ) 5653 self._match(TokenType.R_BRACE) 5654 return self.expression(exp.Parameter, this=this, expression=expression) 5655 5656 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5657 if self._match_set(self.PLACEHOLDER_PARSERS): 5658 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5659 if placeholder: 5660 return placeholder 5661 self._advance(-1) 5662 return None 5663 5664 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5665 if not self._match(TokenType.EXCEPT): 5666 return None 5667 if self._match(TokenType.L_PAREN, advance=False): 5668 return self._parse_wrapped_csv(self._parse_column) 5669 5670 except_column = self._parse_column() 5671 return [except_column] if except_column else None 5672 5673 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5674 if not self._match(TokenType.REPLACE): 5675 return None 5676 if self._match(TokenType.L_PAREN, advance=False): 5677 return self._parse_wrapped_csv(self._parse_expression) 5678 5679 replace_expression = self._parse_expression() 5680 return [replace_expression] if replace_expression else None 5681 5682 def _parse_csv( 5683 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5684 ) -> t.List[exp.Expression]: 5685 parse_result = parse_method() 5686 items = [parse_result] if parse_result is not None else [] 5687 5688 while self._match(sep): 5689 self._add_comments(parse_result) 5690 parse_result = parse_method() 5691 if parse_result is not None: 5692 items.append(parse_result) 5693 5694 return items 5695 5696 def _parse_tokens( 5697 self, parse_method: t.Callable, expressions: t.Dict 5698 ) -> t.Optional[exp.Expression]: 5699 this = parse_method() 5700 5701 while self._match_set(expressions): 5702 this = self.expression( 5703 expressions[self._prev.token_type], 5704 this=this, 5705 comments=self._prev_comments, 5706 expression=parse_method(), 5707 ) 5708 5709 return this 5710 5711 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5712 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5713 5714 def _parse_wrapped_csv( 5715 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5716 ) -> t.List[exp.Expression]: 5717 return self._parse_wrapped( 5718 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5719 ) 5720 5721 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5722 wrapped = self._match(TokenType.L_PAREN) 5723 if not wrapped and not optional: 5724 self.raise_error("Expecting (") 5725 parse_result = parse_method() 5726 if wrapped: 5727 self._match_r_paren() 5728 return parse_result 5729 5730 def _parse_expressions(self) -> t.List[exp.Expression]: 5731 return self._parse_csv(self._parse_expression) 5732 5733 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5734 return self._parse_select() or self._parse_set_operations( 5735 self._parse_expression() if alias else self._parse_conjunction() 5736 ) 5737 5738 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5739 return self._parse_query_modifiers( 5740 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5741 ) 5742 5743 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5744 this = None 5745 if self._match_texts(self.TRANSACTION_KIND): 5746 this = self._prev.text 5747 5748 self._match_texts(("TRANSACTION", "WORK")) 5749 5750 modes = [] 5751 while True: 5752 mode = [] 5753 while self._match(TokenType.VAR): 5754 mode.append(self._prev.text) 5755 5756 if mode: 5757 modes.append(" ".join(mode)) 5758 if not self._match(TokenType.COMMA): 5759 break 5760 5761 return self.expression(exp.Transaction, this=this, modes=modes) 5762 5763 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5764 chain = None 5765 savepoint = None 5766 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5767 5768 self._match_texts(("TRANSACTION", "WORK")) 5769 5770 if self._match_text_seq("TO"): 5771 self._match_text_seq("SAVEPOINT") 5772 savepoint = self._parse_id_var() 5773 5774 if self._match(TokenType.AND): 5775 chain = not self._match_text_seq("NO") 5776 self._match_text_seq("CHAIN") 5777 5778 if is_rollback: 5779 return self.expression(exp.Rollback, savepoint=savepoint) 5780 5781 return self.expression(exp.Commit, chain=chain) 5782 5783 def _parse_refresh(self) -> exp.Refresh: 5784 self._match(TokenType.TABLE) 5785 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5786 5787 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5788 if not self._match_text_seq("ADD"): 5789 return None 5790 5791 self._match(TokenType.COLUMN) 5792 exists_column = self._parse_exists(not_=True) 5793 expression = self._parse_field_def() 5794 5795 if expression: 5796 expression.set("exists", exists_column) 5797 5798 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5799 if self._match_texts(("FIRST", "AFTER")): 5800 position = self._prev.text 5801 column_position = self.expression( 5802 exp.ColumnPosition, this=self._parse_column(), position=position 5803 ) 5804 expression.set("position", column_position) 5805 5806 return expression 5807 5808 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5809 drop = self._match(TokenType.DROP) and self._parse_drop() 5810 if drop and not isinstance(drop, exp.Command): 5811 drop.set("kind", drop.args.get("kind", "COLUMN")) 5812 return drop 5813 5814 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5815 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5816 return self.expression( 5817 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5818 ) 5819 5820 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5821 index = self._index - 1 5822 5823 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5824 return self._parse_csv( 5825 lambda: self.expression( 5826 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5827 ) 5828 ) 5829 5830 self._retreat(index) 5831 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5832 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5833 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5834 5835 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 5836 if self._match_texts(self.ALTER_ALTER_PARSERS): 5837 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 5838 5839 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 5840 # keyword after ALTER we default to parsing this statement 5841 self._match(TokenType.COLUMN) 5842 column = self._parse_field(any_token=True) 5843 5844 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5845 return self.expression(exp.AlterColumn, this=column, drop=True) 5846 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5847 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5848 if self._match(TokenType.COMMENT): 5849 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5850 5851 self._match_text_seq("SET", "DATA") 5852 self._match_text_seq("TYPE") 5853 return self.expression( 5854 exp.AlterColumn, 5855 this=column, 5856 dtype=self._parse_types(), 5857 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5858 using=self._match(TokenType.USING) and self._parse_conjunction(), 5859 ) 5860 5861 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 5862 if self._match_texts(("ALL", "EVEN", "AUTO")): 5863 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 5864 5865 self._match_text_seq("KEY", "DISTKEY") 5866 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 5867 5868 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 5869 if compound: 5870 self._match_text_seq("SORTKEY") 5871 5872 if self._match(TokenType.L_PAREN, advance=False): 5873 return self.expression( 5874 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 5875 ) 5876 5877 self._match_texts(("AUTO", "NONE")) 5878 return self.expression( 5879 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 5880 ) 5881 5882 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5883 index = self._index - 1 5884 5885 partition_exists = self._parse_exists() 5886 if self._match(TokenType.PARTITION, advance=False): 5887 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5888 5889 self._retreat(index) 5890 return self._parse_csv(self._parse_drop_column) 5891 5892 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5893 if self._match(TokenType.COLUMN): 5894 exists = self._parse_exists() 5895 old_column = self._parse_column() 5896 to = self._match_text_seq("TO") 5897 new_column = self._parse_column() 5898 5899 if old_column is None or to is None or new_column is None: 5900 return None 5901 5902 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5903 5904 self._match_text_seq("TO") 5905 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5906 5907 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5908 start = self._prev 5909 5910 if not self._match(TokenType.TABLE): 5911 return self._parse_as_command(start) 5912 5913 exists = self._parse_exists() 5914 only = self._match_text_seq("ONLY") 5915 this = self._parse_table(schema=True) 5916 5917 if self._next: 5918 self._advance() 5919 5920 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5921 if parser: 5922 actions = ensure_list(parser(self)) 5923 options = self._parse_csv(self._parse_property) 5924 5925 if not self._curr and actions: 5926 return self.expression( 5927 exp.AlterTable, 5928 this=this, 5929 exists=exists, 5930 actions=actions, 5931 only=only, 5932 options=options, 5933 ) 5934 5935 return self._parse_as_command(start) 5936 5937 def _parse_merge(self) -> exp.Merge: 5938 self._match(TokenType.INTO) 5939 target = self._parse_table() 5940 5941 if target and self._match(TokenType.ALIAS, advance=False): 5942 target.set("alias", self._parse_table_alias()) 5943 5944 self._match(TokenType.USING) 5945 using = self._parse_table() 5946 5947 self._match(TokenType.ON) 5948 on = self._parse_conjunction() 5949 5950 return self.expression( 5951 exp.Merge, 5952 this=target, 5953 using=using, 5954 on=on, 5955 expressions=self._parse_when_matched(), 5956 ) 5957 5958 def _parse_when_matched(self) -> t.List[exp.When]: 5959 whens = [] 5960 5961 while self._match(TokenType.WHEN): 5962 matched = not self._match(TokenType.NOT) 5963 self._match_text_seq("MATCHED") 5964 source = ( 5965 False 5966 if self._match_text_seq("BY", "TARGET") 5967 else self._match_text_seq("BY", "SOURCE") 5968 ) 5969 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5970 5971 self._match(TokenType.THEN) 5972 5973 if self._match(TokenType.INSERT): 5974 _this = self._parse_star() 5975 if _this: 5976 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5977 else: 5978 then = self.expression( 5979 exp.Insert, 5980 this=self._parse_value(), 5981 expression=self._match_text_seq("VALUES") and self._parse_value(), 5982 ) 5983 elif self._match(TokenType.UPDATE): 5984 expressions = self._parse_star() 5985 if expressions: 5986 then = self.expression(exp.Update, expressions=expressions) 5987 else: 5988 then = self.expression( 5989 exp.Update, 5990 expressions=self._match(TokenType.SET) 5991 and self._parse_csv(self._parse_equality), 5992 ) 5993 elif self._match(TokenType.DELETE): 5994 then = self.expression(exp.Var, this=self._prev.text) 5995 else: 5996 then = None 5997 5998 whens.append( 5999 self.expression( 6000 exp.When, 6001 matched=matched, 6002 source=source, 6003 condition=condition, 6004 then=then, 6005 ) 6006 ) 6007 return whens 6008 6009 def _parse_show(self) -> t.Optional[exp.Expression]: 6010 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6011 if parser: 6012 return parser(self) 6013 return self._parse_as_command(self._prev) 6014 6015 def _parse_set_item_assignment( 6016 self, kind: t.Optional[str] = None 6017 ) -> t.Optional[exp.Expression]: 6018 index = self._index 6019 6020 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6021 return self._parse_set_transaction(global_=kind == "GLOBAL") 6022 6023 left = self._parse_primary() or self._parse_column() 6024 assignment_delimiter = self._match_texts(("=", "TO")) 6025 6026 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6027 self._retreat(index) 6028 return None 6029 6030 right = self._parse_statement() or self._parse_id_var() 6031 this = self.expression(exp.EQ, this=left, expression=right) 6032 6033 return self.expression(exp.SetItem, this=this, kind=kind) 6034 6035 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6036 self._match_text_seq("TRANSACTION") 6037 characteristics = self._parse_csv( 6038 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6039 ) 6040 return self.expression( 6041 exp.SetItem, 6042 expressions=characteristics, 6043 kind="TRANSACTION", 6044 **{"global": global_}, # type: ignore 6045 ) 6046 6047 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6048 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6049 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6050 6051 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6052 index = self._index 6053 set_ = self.expression( 6054 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6055 ) 6056 6057 if self._curr: 6058 self._retreat(index) 6059 return self._parse_as_command(self._prev) 6060 6061 return set_ 6062 6063 def _parse_var_from_options( 6064 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6065 ) -> t.Optional[exp.Var]: 6066 start = self._curr 6067 if not start: 6068 return None 6069 6070 option = start.text.upper() 6071 continuations = options.get(option) 6072 6073 index = self._index 6074 self._advance() 6075 for keywords in continuations or []: 6076 if isinstance(keywords, str): 6077 keywords = (keywords,) 6078 6079 if self._match_text_seq(*keywords): 6080 option = f"{option} {' '.join(keywords)}" 6081 break 6082 else: 6083 if continuations or continuations is None: 6084 if raise_unmatched: 6085 self.raise_error(f"Unknown option {option}") 6086 6087 self._retreat(index) 6088 return None 6089 6090 return exp.var(option) 6091 6092 def _parse_as_command(self, start: Token) -> exp.Command: 6093 while self._curr: 6094 self._advance() 6095 text = self._find_sql(start, self._prev) 6096 size = len(start.text) 6097 self._warn_unsupported() 6098 return exp.Command(this=text[:size], expression=text[size:]) 6099 6100 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6101 settings = [] 6102 6103 self._match_l_paren() 6104 kind = self._parse_id_var() 6105 6106 if self._match(TokenType.L_PAREN): 6107 while True: 6108 key = self._parse_id_var() 6109 value = self._parse_primary() 6110 6111 if not key and value is None: 6112 break 6113 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6114 self._match(TokenType.R_PAREN) 6115 6116 self._match_r_paren() 6117 6118 return self.expression( 6119 exp.DictProperty, 6120 this=this, 6121 kind=kind.this if kind else None, 6122 settings=settings, 6123 ) 6124 6125 def _parse_dict_range(self, this: str) -> exp.DictRange: 6126 self._match_l_paren() 6127 has_min = self._match_text_seq("MIN") 6128 if has_min: 6129 min = self._parse_var() or self._parse_primary() 6130 self._match_text_seq("MAX") 6131 max = self._parse_var() or self._parse_primary() 6132 else: 6133 max = self._parse_var() or self._parse_primary() 6134 min = exp.Literal.number(0) 6135 self._match_r_paren() 6136 return self.expression(exp.DictRange, this=this, min=min, max=max) 6137 6138 def _parse_comprehension( 6139 self, this: t.Optional[exp.Expression] 6140 ) -> t.Optional[exp.Comprehension]: 6141 index = self._index 6142 expression = self._parse_column() 6143 if not self._match(TokenType.IN): 6144 self._retreat(index - 1) 6145 return None 6146 iterator = self._parse_column() 6147 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6148 return self.expression( 6149 exp.Comprehension, 6150 this=this, 6151 expression=expression, 6152 iterator=iterator, 6153 condition=condition, 6154 ) 6155 6156 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6157 if self._match(TokenType.HEREDOC_STRING): 6158 return self.expression(exp.Heredoc, this=self._prev.text) 6159 6160 if not self._match_text_seq("$"): 6161 return None 6162 6163 tags = ["$"] 6164 tag_text = None 6165 6166 if self._is_connected(): 6167 self._advance() 6168 tags.append(self._prev.text.upper()) 6169 else: 6170 self.raise_error("No closing $ found") 6171 6172 if tags[-1] != "$": 6173 if self._is_connected() and self._match_text_seq("$"): 6174 tag_text = tags[-1] 6175 tags.append("$") 6176 else: 6177 self.raise_error("No closing $ found") 6178 6179 heredoc_start = self._curr 6180 6181 while self._curr: 6182 if self._match_text_seq(*tags, advance=False): 6183 this = self._find_sql(heredoc_start, self._prev) 6184 self._advance(len(tags)) 6185 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6186 6187 self._advance() 6188 6189 self.raise_error(f"No closing {''.join(tags)} found") 6190 return None 6191 6192 def _find_parser( 6193 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6194 ) -> t.Optional[t.Callable]: 6195 if not self._curr: 6196 return None 6197 6198 index = self._index 6199 this = [] 6200 while True: 6201 # The current token might be multiple words 6202 curr = self._curr.text.upper() 6203 key = curr.split(" ") 6204 this.append(curr) 6205 6206 self._advance() 6207 result, trie = in_trie(trie, key) 6208 if result == TrieResult.FAILED: 6209 break 6210 6211 if result == TrieResult.EXISTS: 6212 subparser = parsers[" ".join(this)] 6213 return subparser 6214 6215 self._retreat(index) 6216 return None 6217 6218 def _match(self, token_type, advance=True, expression=None): 6219 if not self._curr: 6220 return None 6221 6222 if self._curr.token_type == token_type: 6223 if advance: 6224 self._advance() 6225 self._add_comments(expression) 6226 return True 6227 6228 return None 6229 6230 def _match_set(self, types, advance=True): 6231 if not self._curr: 6232 return None 6233 6234 if self._curr.token_type in types: 6235 if advance: 6236 self._advance() 6237 return True 6238 6239 return None 6240 6241 def _match_pair(self, token_type_a, token_type_b, advance=True): 6242 if not self._curr or not self._next: 6243 return None 6244 6245 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6246 if advance: 6247 self._advance(2) 6248 return True 6249 6250 return None 6251 6252 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6253 if not self._match(TokenType.L_PAREN, expression=expression): 6254 self.raise_error("Expecting (") 6255 6256 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6257 if not self._match(TokenType.R_PAREN, expression=expression): 6258 self.raise_error("Expecting )") 6259 6260 def _match_texts(self, texts, advance=True): 6261 if self._curr and self._curr.text.upper() in texts: 6262 if advance: 6263 self._advance() 6264 return True 6265 return None 6266 6267 def _match_text_seq(self, *texts, advance=True): 6268 index = self._index 6269 for text in texts: 6270 if self._curr and self._curr.text.upper() == text: 6271 self._advance() 6272 else: 6273 self._retreat(index) 6274 return None 6275 6276 if not advance: 6277 self._retreat(index) 6278 6279 return True 6280 6281 def _replace_lambda( 6282 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6283 ) -> t.Optional[exp.Expression]: 6284 if not node: 6285 return node 6286 6287 for column in node.find_all(exp.Column): 6288 if column.parts[0].name in lambda_variables: 6289 dot_or_id = column.to_dot() if column.table else column.this 6290 parent = column.parent 6291 6292 while isinstance(parent, exp.Dot): 6293 if not isinstance(parent.parent, exp.Dot): 6294 parent.replace(dot_or_id) 6295 break 6296 parent = parent.parent 6297 else: 6298 if column is node: 6299 node = dot_or_id 6300 else: 6301 column.replace(dot_or_id) 6302 return node 6303 6304 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6305 start = self._prev 6306 6307 # Not to be confused with TRUNCATE(number, decimals) function call 6308 if self._match(TokenType.L_PAREN): 6309 self._retreat(self._index - 2) 6310 return self._parse_function() 6311 6312 # Clickhouse supports TRUNCATE DATABASE as well 6313 is_database = self._match(TokenType.DATABASE) 6314 6315 self._match(TokenType.TABLE) 6316 6317 exists = self._parse_exists(not_=False) 6318 6319 expressions = self._parse_csv( 6320 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6321 ) 6322 6323 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6324 6325 if self._match_text_seq("RESTART", "IDENTITY"): 6326 identity = "RESTART" 6327 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6328 identity = "CONTINUE" 6329 else: 6330 identity = None 6331 6332 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6333 option = self._prev.text 6334 else: 6335 option = None 6336 6337 partition = self._parse_partition() 6338 6339 # Fallback case 6340 if self._curr: 6341 return self._parse_as_command(start) 6342 6343 return self.expression( 6344 exp.TruncateTable, 6345 expressions=expressions, 6346 is_database=is_database, 6347 exists=exists, 6348 cluster=cluster, 6349 identity=identity, 6350 option=option, 6351 partition=partition, 6352 ) 6353 6354 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6355 this = self._parse_ordered(self._parse_opclass) 6356 6357 if not self._match(TokenType.WITH): 6358 return this 6359 6360 op = self._parse_var(any_token=True) 6361 6362 return self.expression(exp.WithOperator, this=this, op=op) 6363 6364 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6365 opts = [] 6366 self._match(TokenType.EQ) 6367 self._match(TokenType.L_PAREN) 6368 while self._curr and not self._match(TokenType.R_PAREN): 6369 opts.append(self._parse_conjunction()) 6370 self._match(TokenType.COMMA) 6371 return opts 6372 6373 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6374 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6375 6376 options = [] 6377 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6378 option = self._parse_unquoted_field() 6379 value = None 6380 6381 # Some options are defined as functions with the values as params 6382 if not isinstance(option, exp.Func): 6383 prev = self._prev.text.upper() 6384 # Different dialects might separate options and values by white space, "=" and "AS" 6385 self._match(TokenType.EQ) 6386 self._match(TokenType.ALIAS) 6387 6388 if prev == "FILE_FORMAT" and self._match(TokenType.L_PAREN): 6389 # Snowflake FILE_FORMAT case 6390 value = self._parse_wrapped_options() 6391 else: 6392 value = self._parse_unquoted_field() 6393 6394 param = self.expression(exp.CopyParameter, this=option, expression=value) 6395 options.append(param) 6396 6397 if sep: 6398 self._match(sep) 6399 6400 return options 6401 6402 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6403 expr = self.expression(exp.Credentials) 6404 6405 if self._match_text_seq("STORAGE_INTEGRATION", advance=False): 6406 expr.set("storage", self._parse_conjunction()) 6407 if self._match_text_seq("CREDENTIALS"): 6408 # Snowflake supports CREDENTIALS = (...), while Redshift CREDENTIALS <string> 6409 creds = ( 6410 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6411 ) 6412 expr.set("credentials", creds) 6413 if self._match_text_seq("ENCRYPTION"): 6414 expr.set("encryption", self._parse_wrapped_options()) 6415 if self._match_text_seq("IAM_ROLE"): 6416 expr.set("iam_role", self._parse_field()) 6417 if self._match_text_seq("REGION"): 6418 expr.set("region", self._parse_field()) 6419 6420 return expr 6421 6422 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6423 return self._parse_field() 6424 6425 def _parse_copy(self) -> exp.Copy | exp.Command: 6426 start = self._prev 6427 6428 self._match(TokenType.INTO) 6429 6430 this = ( 6431 self._parse_conjunction() 6432 if self._match(TokenType.L_PAREN, advance=False) 6433 else self._parse_table(schema=True) 6434 ) 6435 6436 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6437 6438 files = self._parse_csv(self._parse_file_location) 6439 credentials = self._parse_credentials() 6440 6441 self._match_text_seq("WITH") 6442 6443 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6444 6445 # Fallback case 6446 if self._curr: 6447 return self._parse_as_command(start) 6448 6449 return self.expression( 6450 exp.Copy, 6451 this=this, 6452 kind=kind, 6453 credentials=credentials, 6454 files=files, 6455 params=params, 6456 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1157 def __init__( 1158 self, 1159 error_level: t.Optional[ErrorLevel] = None, 1160 error_message_context: int = 100, 1161 max_errors: int = 3, 1162 dialect: DialectType = None, 1163 ): 1164 from sqlglot.dialects import Dialect 1165 1166 self.error_level = error_level or ErrorLevel.IMMEDIATE 1167 self.error_message_context = error_message_context 1168 self.max_errors = max_errors 1169 self.dialect = Dialect.get_or_raise(dialect) 1170 self.reset()
1182 def parse( 1183 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1184 ) -> t.List[t.Optional[exp.Expression]]: 1185 """ 1186 Parses a list of tokens and returns a list of syntax trees, one tree 1187 per parsed SQL statement. 1188 1189 Args: 1190 raw_tokens: The list of tokens. 1191 sql: The original SQL string, used to produce helpful debug messages. 1192 1193 Returns: 1194 The list of the produced syntax trees. 1195 """ 1196 return self._parse( 1197 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1198 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1200 def parse_into( 1201 self, 1202 expression_types: exp.IntoType, 1203 raw_tokens: t.List[Token], 1204 sql: t.Optional[str] = None, 1205 ) -> t.List[t.Optional[exp.Expression]]: 1206 """ 1207 Parses a list of tokens into a given Expression type. If a collection of Expression 1208 types is given instead, this method will try to parse the token list into each one 1209 of them, stopping at the first for which the parsing succeeds. 1210 1211 Args: 1212 expression_types: The expression type(s) to try and parse the token list into. 1213 raw_tokens: The list of tokens. 1214 sql: The original SQL string, used to produce helpful debug messages. 1215 1216 Returns: 1217 The target Expression. 1218 """ 1219 errors = [] 1220 for expression_type in ensure_list(expression_types): 1221 parser = self.EXPRESSION_PARSERS.get(expression_type) 1222 if not parser: 1223 raise TypeError(f"No parser registered for {expression_type}") 1224 1225 try: 1226 return self._parse(parser, raw_tokens, sql) 1227 except ParseError as e: 1228 e.errors[0]["into_expression"] = expression_type 1229 errors.append(e) 1230 1231 raise ParseError( 1232 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1233 errors=merge_errors(errors), 1234 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1271 def check_errors(self) -> None: 1272 """Logs or raises any found errors, depending on the chosen error level setting.""" 1273 if self.error_level == ErrorLevel.WARN: 1274 for error in self.errors: 1275 logger.error(str(error)) 1276 elif self.error_level == ErrorLevel.RAISE and self.errors: 1277 raise ParseError( 1278 concat_messages(self.errors, self.max_errors), 1279 errors=merge_errors(self.errors), 1280 )
Logs or raises any found errors, depending on the chosen error level setting.
1282 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1283 """ 1284 Appends an error in the list of recorded errors or raises it, depending on the chosen 1285 error level setting. 1286 """ 1287 token = token or self._curr or self._prev or Token.string("") 1288 start = token.start 1289 end = token.end + 1 1290 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1291 highlight = self.sql[start:end] 1292 end_context = self.sql[end : end + self.error_message_context] 1293 1294 error = ParseError.new( 1295 f"{message}. Line {token.line}, Col: {token.col}.\n" 1296 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1297 description=message, 1298 line=token.line, 1299 col=token.col, 1300 start_context=start_context, 1301 highlight=highlight, 1302 end_context=end_context, 1303 ) 1304 1305 if self.error_level == ErrorLevel.IMMEDIATE: 1306 raise error 1307 1308 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1310 def expression( 1311 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1312 ) -> E: 1313 """ 1314 Creates a new, validated Expression. 1315 1316 Args: 1317 exp_class: The expression class to instantiate. 1318 comments: An optional list of comments to attach to the expression. 1319 kwargs: The arguments to set for the expression along with their respective values. 1320 1321 Returns: 1322 The target expression. 1323 """ 1324 instance = exp_class(**kwargs) 1325 instance.add_comments(comments) if comments else self._add_comments(instance) 1326 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1333 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1334 """ 1335 Validates an Expression, making sure that all its mandatory arguments are set. 1336 1337 Args: 1338 expression: The expression to validate. 1339 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1340 1341 Returns: 1342 The validated expression. 1343 """ 1344 if self.error_level != ErrorLevel.IGNORE: 1345 for error_message in expression.error_messages(args): 1346 self.raise_error(error_message) 1347 1348 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.