sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 return lambda self, this: self._parse_escape( 47 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 48 ) 49 50 51def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 62 63 64def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 65 def _builder(args: t.List, dialect: Dialect) -> E: 66 expression = expr_type( 67 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 68 ) 69 if len(args) > 2 and expr_type is exp.JSONExtract: 70 expression.set("expressions", args[2:]) 71 72 return expression 73 74 return _builder 75 76 77def build_mod(args: t.List) -> exp.Mod: 78 this = seq_get(args, 0) 79 expression = seq_get(args, 1) 80 81 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 82 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 83 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 84 85 return exp.Mod(this=this, expression=expression) 86 87 88class _Parser(type): 89 def __new__(cls, clsname, bases, attrs): 90 klass = super().__new__(cls, clsname, bases, attrs) 91 92 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 93 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 94 95 return klass 96 97 98class Parser(metaclass=_Parser): 99 """ 100 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 101 102 Args: 103 error_level: The desired error level. 104 Default: ErrorLevel.IMMEDIATE 105 error_message_context: The amount of context to capture from a query string when displaying 106 the error message (in number of characters). 107 Default: 100 108 max_errors: Maximum number of error messages to include in a raised ParseError. 109 This is only relevant if error_level is ErrorLevel.RAISE. 110 Default: 3 111 """ 112 113 FUNCTIONS: t.Dict[str, t.Callable] = { 114 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 115 "CONCAT": lambda args, dialect: exp.Concat( 116 expressions=args, 117 safe=not dialect.STRICT_STRING_CONCAT, 118 coalesce=dialect.CONCAT_COALESCE, 119 ), 120 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 121 expressions=args, 122 safe=not dialect.STRICT_STRING_CONCAT, 123 coalesce=dialect.CONCAT_COALESCE, 124 ), 125 "DATE_TO_DATE_STR": lambda args: exp.Cast( 126 this=seq_get(args, 0), 127 to=exp.DataType(this=exp.DataType.Type.TEXT), 128 ), 129 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 130 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 131 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 132 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 133 "LIKE": build_like, 134 "LOG": build_logarithm, 135 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 136 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 137 "MOD": build_mod, 138 "TIME_TO_TIME_STR": lambda args: exp.Cast( 139 this=seq_get(args, 0), 140 to=exp.DataType(this=exp.DataType.Type.TEXT), 141 ), 142 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 143 this=exp.Cast( 144 this=seq_get(args, 0), 145 to=exp.DataType(this=exp.DataType.Type.TEXT), 146 ), 147 start=exp.Literal.number(1), 148 length=exp.Literal.number(10), 149 ), 150 "VAR_MAP": build_var_map, 151 } 152 153 NO_PAREN_FUNCTIONS = { 154 TokenType.CURRENT_DATE: exp.CurrentDate, 155 TokenType.CURRENT_DATETIME: exp.CurrentDate, 156 TokenType.CURRENT_TIME: exp.CurrentTime, 157 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 158 TokenType.CURRENT_USER: exp.CurrentUser, 159 } 160 161 STRUCT_TYPE_TOKENS = { 162 TokenType.NESTED, 163 TokenType.OBJECT, 164 TokenType.STRUCT, 165 } 166 167 NESTED_TYPE_TOKENS = { 168 TokenType.ARRAY, 169 TokenType.LOWCARDINALITY, 170 TokenType.MAP, 171 TokenType.NULLABLE, 172 *STRUCT_TYPE_TOKENS, 173 } 174 175 ENUM_TYPE_TOKENS = { 176 TokenType.ENUM, 177 TokenType.ENUM8, 178 TokenType.ENUM16, 179 } 180 181 AGGREGATE_TYPE_TOKENS = { 182 TokenType.AGGREGATEFUNCTION, 183 TokenType.SIMPLEAGGREGATEFUNCTION, 184 } 185 186 TYPE_TOKENS = { 187 TokenType.BIT, 188 TokenType.BOOLEAN, 189 TokenType.TINYINT, 190 TokenType.UTINYINT, 191 TokenType.SMALLINT, 192 TokenType.USMALLINT, 193 TokenType.INT, 194 TokenType.UINT, 195 TokenType.BIGINT, 196 TokenType.UBIGINT, 197 TokenType.INT128, 198 TokenType.UINT128, 199 TokenType.INT256, 200 TokenType.UINT256, 201 TokenType.MEDIUMINT, 202 TokenType.UMEDIUMINT, 203 TokenType.FIXEDSTRING, 204 TokenType.FLOAT, 205 TokenType.DOUBLE, 206 TokenType.CHAR, 207 TokenType.NCHAR, 208 TokenType.VARCHAR, 209 TokenType.NVARCHAR, 210 TokenType.BPCHAR, 211 TokenType.TEXT, 212 TokenType.MEDIUMTEXT, 213 TokenType.LONGTEXT, 214 TokenType.MEDIUMBLOB, 215 TokenType.LONGBLOB, 216 TokenType.BINARY, 217 TokenType.VARBINARY, 218 TokenType.JSON, 219 TokenType.JSONB, 220 TokenType.INTERVAL, 221 TokenType.TINYBLOB, 222 TokenType.TINYTEXT, 223 TokenType.TIME, 224 TokenType.TIMETZ, 225 TokenType.TIMESTAMP, 226 TokenType.TIMESTAMP_S, 227 TokenType.TIMESTAMP_MS, 228 TokenType.TIMESTAMP_NS, 229 TokenType.TIMESTAMPTZ, 230 TokenType.TIMESTAMPLTZ, 231 TokenType.TIMESTAMPNTZ, 232 TokenType.DATETIME, 233 TokenType.DATETIME64, 234 TokenType.DATE, 235 TokenType.DATE32, 236 TokenType.INT4RANGE, 237 TokenType.INT4MULTIRANGE, 238 TokenType.INT8RANGE, 239 TokenType.INT8MULTIRANGE, 240 TokenType.NUMRANGE, 241 TokenType.NUMMULTIRANGE, 242 TokenType.TSRANGE, 243 TokenType.TSMULTIRANGE, 244 TokenType.TSTZRANGE, 245 TokenType.TSTZMULTIRANGE, 246 TokenType.DATERANGE, 247 TokenType.DATEMULTIRANGE, 248 TokenType.DECIMAL, 249 TokenType.UDECIMAL, 250 TokenType.BIGDECIMAL, 251 TokenType.UUID, 252 TokenType.GEOGRAPHY, 253 TokenType.GEOMETRY, 254 TokenType.HLLSKETCH, 255 TokenType.HSTORE, 256 TokenType.PSEUDO_TYPE, 257 TokenType.SUPER, 258 TokenType.SERIAL, 259 TokenType.SMALLSERIAL, 260 TokenType.BIGSERIAL, 261 TokenType.XML, 262 TokenType.YEAR, 263 TokenType.UNIQUEIDENTIFIER, 264 TokenType.USERDEFINED, 265 TokenType.MONEY, 266 TokenType.SMALLMONEY, 267 TokenType.ROWVERSION, 268 TokenType.IMAGE, 269 TokenType.VARIANT, 270 TokenType.OBJECT, 271 TokenType.OBJECT_IDENTIFIER, 272 TokenType.INET, 273 TokenType.IPADDRESS, 274 TokenType.IPPREFIX, 275 TokenType.IPV4, 276 TokenType.IPV6, 277 TokenType.UNKNOWN, 278 TokenType.NULL, 279 TokenType.NAME, 280 TokenType.TDIGEST, 281 *ENUM_TYPE_TOKENS, 282 *NESTED_TYPE_TOKENS, 283 *AGGREGATE_TYPE_TOKENS, 284 } 285 286 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 287 TokenType.BIGINT: TokenType.UBIGINT, 288 TokenType.INT: TokenType.UINT, 289 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 290 TokenType.SMALLINT: TokenType.USMALLINT, 291 TokenType.TINYINT: TokenType.UTINYINT, 292 TokenType.DECIMAL: TokenType.UDECIMAL, 293 } 294 295 SUBQUERY_PREDICATES = { 296 TokenType.ANY: exp.Any, 297 TokenType.ALL: exp.All, 298 TokenType.EXISTS: exp.Exists, 299 TokenType.SOME: exp.Any, 300 } 301 302 RESERVED_TOKENS = { 303 *Tokenizer.SINGLE_TOKENS.values(), 304 TokenType.SELECT, 305 } - {TokenType.IDENTIFIER} 306 307 DB_CREATABLES = { 308 TokenType.DATABASE, 309 TokenType.SCHEMA, 310 TokenType.TABLE, 311 TokenType.VIEW, 312 TokenType.MODEL, 313 TokenType.DICTIONARY, 314 TokenType.SEQUENCE, 315 TokenType.STORAGE_INTEGRATION, 316 } 317 318 CREATABLES = { 319 TokenType.COLUMN, 320 TokenType.CONSTRAINT, 321 TokenType.FUNCTION, 322 TokenType.INDEX, 323 TokenType.PROCEDURE, 324 TokenType.FOREIGN_KEY, 325 *DB_CREATABLES, 326 } 327 328 # Tokens that can represent identifiers 329 ID_VAR_TOKENS = { 330 TokenType.VAR, 331 TokenType.ANTI, 332 TokenType.APPLY, 333 TokenType.ASC, 334 TokenType.ASOF, 335 TokenType.AUTO_INCREMENT, 336 TokenType.BEGIN, 337 TokenType.BPCHAR, 338 TokenType.CACHE, 339 TokenType.CASE, 340 TokenType.COLLATE, 341 TokenType.COMMAND, 342 TokenType.COMMENT, 343 TokenType.COMMIT, 344 TokenType.CONSTRAINT, 345 TokenType.COPY, 346 TokenType.DEFAULT, 347 TokenType.DELETE, 348 TokenType.DESC, 349 TokenType.DESCRIBE, 350 TokenType.DICTIONARY, 351 TokenType.DIV, 352 TokenType.END, 353 TokenType.EXECUTE, 354 TokenType.ESCAPE, 355 TokenType.FALSE, 356 TokenType.FIRST, 357 TokenType.FILTER, 358 TokenType.FINAL, 359 TokenType.FORMAT, 360 TokenType.FULL, 361 TokenType.IDENTIFIER, 362 TokenType.IS, 363 TokenType.ISNULL, 364 TokenType.INTERVAL, 365 TokenType.KEEP, 366 TokenType.KILL, 367 TokenType.LEFT, 368 TokenType.LOAD, 369 TokenType.MERGE, 370 TokenType.NATURAL, 371 TokenType.NEXT, 372 TokenType.OFFSET, 373 TokenType.OPERATOR, 374 TokenType.ORDINALITY, 375 TokenType.OVERLAPS, 376 TokenType.OVERWRITE, 377 TokenType.PARTITION, 378 TokenType.PERCENT, 379 TokenType.PIVOT, 380 TokenType.PRAGMA, 381 TokenType.RANGE, 382 TokenType.RECURSIVE, 383 TokenType.REFERENCES, 384 TokenType.REFRESH, 385 TokenType.REPLACE, 386 TokenType.RIGHT, 387 TokenType.ROW, 388 TokenType.ROWS, 389 TokenType.SEMI, 390 TokenType.SET, 391 TokenType.SETTINGS, 392 TokenType.SHOW, 393 TokenType.TEMPORARY, 394 TokenType.TOP, 395 TokenType.TRUE, 396 TokenType.TRUNCATE, 397 TokenType.UNIQUE, 398 TokenType.UNPIVOT, 399 TokenType.UPDATE, 400 TokenType.USE, 401 TokenType.VOLATILE, 402 TokenType.WINDOW, 403 *CREATABLES, 404 *SUBQUERY_PREDICATES, 405 *TYPE_TOKENS, 406 *NO_PAREN_FUNCTIONS, 407 } 408 409 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 410 411 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 412 TokenType.ANTI, 413 TokenType.APPLY, 414 TokenType.ASOF, 415 TokenType.FULL, 416 TokenType.LEFT, 417 TokenType.LOCK, 418 TokenType.NATURAL, 419 TokenType.OFFSET, 420 TokenType.RIGHT, 421 TokenType.SEMI, 422 TokenType.WINDOW, 423 } 424 425 ALIAS_TOKENS = ID_VAR_TOKENS 426 427 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 428 429 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 430 431 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 432 433 FUNC_TOKENS = { 434 TokenType.COLLATE, 435 TokenType.COMMAND, 436 TokenType.CURRENT_DATE, 437 TokenType.CURRENT_DATETIME, 438 TokenType.CURRENT_TIMESTAMP, 439 TokenType.CURRENT_TIME, 440 TokenType.CURRENT_USER, 441 TokenType.FILTER, 442 TokenType.FIRST, 443 TokenType.FORMAT, 444 TokenType.GLOB, 445 TokenType.IDENTIFIER, 446 TokenType.INDEX, 447 TokenType.ISNULL, 448 TokenType.ILIKE, 449 TokenType.INSERT, 450 TokenType.LIKE, 451 TokenType.MERGE, 452 TokenType.OFFSET, 453 TokenType.PRIMARY_KEY, 454 TokenType.RANGE, 455 TokenType.REPLACE, 456 TokenType.RLIKE, 457 TokenType.ROW, 458 TokenType.UNNEST, 459 TokenType.VAR, 460 TokenType.LEFT, 461 TokenType.RIGHT, 462 TokenType.SEQUENCE, 463 TokenType.DATE, 464 TokenType.DATETIME, 465 TokenType.TABLE, 466 TokenType.TIMESTAMP, 467 TokenType.TIMESTAMPTZ, 468 TokenType.TRUNCATE, 469 TokenType.WINDOW, 470 TokenType.XOR, 471 *TYPE_TOKENS, 472 *SUBQUERY_PREDICATES, 473 } 474 475 CONJUNCTION = { 476 TokenType.AND: exp.And, 477 TokenType.OR: exp.Or, 478 } 479 480 EQUALITY = { 481 TokenType.COLON_EQ: exp.PropertyEQ, 482 TokenType.EQ: exp.EQ, 483 TokenType.NEQ: exp.NEQ, 484 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 485 } 486 487 COMPARISON = { 488 TokenType.GT: exp.GT, 489 TokenType.GTE: exp.GTE, 490 TokenType.LT: exp.LT, 491 TokenType.LTE: exp.LTE, 492 } 493 494 BITWISE = { 495 TokenType.AMP: exp.BitwiseAnd, 496 TokenType.CARET: exp.BitwiseXor, 497 TokenType.PIPE: exp.BitwiseOr, 498 } 499 500 TERM = { 501 TokenType.DASH: exp.Sub, 502 TokenType.PLUS: exp.Add, 503 TokenType.MOD: exp.Mod, 504 TokenType.COLLATE: exp.Collate, 505 } 506 507 FACTOR = { 508 TokenType.DIV: exp.IntDiv, 509 TokenType.LR_ARROW: exp.Distance, 510 TokenType.SLASH: exp.Div, 511 TokenType.STAR: exp.Mul, 512 } 513 514 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 515 516 TIMES = { 517 TokenType.TIME, 518 TokenType.TIMETZ, 519 } 520 521 TIMESTAMPS = { 522 TokenType.TIMESTAMP, 523 TokenType.TIMESTAMPTZ, 524 TokenType.TIMESTAMPLTZ, 525 *TIMES, 526 } 527 528 SET_OPERATIONS = { 529 TokenType.UNION, 530 TokenType.INTERSECT, 531 TokenType.EXCEPT, 532 } 533 534 JOIN_METHODS = { 535 TokenType.ASOF, 536 TokenType.NATURAL, 537 TokenType.POSITIONAL, 538 } 539 540 JOIN_SIDES = { 541 TokenType.LEFT, 542 TokenType.RIGHT, 543 TokenType.FULL, 544 } 545 546 JOIN_KINDS = { 547 TokenType.INNER, 548 TokenType.OUTER, 549 TokenType.CROSS, 550 TokenType.SEMI, 551 TokenType.ANTI, 552 } 553 554 JOIN_HINTS: t.Set[str] = set() 555 556 LAMBDAS = { 557 TokenType.ARROW: lambda self, expressions: self.expression( 558 exp.Lambda, 559 this=self._replace_lambda( 560 self._parse_conjunction(), 561 {node.name for node in expressions}, 562 ), 563 expressions=expressions, 564 ), 565 TokenType.FARROW: lambda self, expressions: self.expression( 566 exp.Kwarg, 567 this=exp.var(expressions[0].name), 568 expression=self._parse_conjunction(), 569 ), 570 } 571 572 COLUMN_OPERATORS = { 573 TokenType.DOT: None, 574 TokenType.DCOLON: lambda self, this, to: self.expression( 575 exp.Cast if self.STRICT_CAST else exp.TryCast, 576 this=this, 577 to=to, 578 ), 579 TokenType.ARROW: lambda self, this, path: self.expression( 580 exp.JSONExtract, 581 this=this, 582 expression=self.dialect.to_json_path(path), 583 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 584 ), 585 TokenType.DARROW: lambda self, this, path: self.expression( 586 exp.JSONExtractScalar, 587 this=this, 588 expression=self.dialect.to_json_path(path), 589 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 590 ), 591 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 592 exp.JSONBExtract, 593 this=this, 594 expression=path, 595 ), 596 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 597 exp.JSONBExtractScalar, 598 this=this, 599 expression=path, 600 ), 601 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 602 exp.JSONBContains, 603 this=this, 604 expression=key, 605 ), 606 } 607 608 EXPRESSION_PARSERS = { 609 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 610 exp.Column: lambda self: self._parse_column(), 611 exp.Condition: lambda self: self._parse_conjunction(), 612 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 613 exp.Expression: lambda self: self._parse_expression(), 614 exp.From: lambda self: self._parse_from(joins=True), 615 exp.Group: lambda self: self._parse_group(), 616 exp.Having: lambda self: self._parse_having(), 617 exp.Identifier: lambda self: self._parse_id_var(), 618 exp.Join: lambda self: self._parse_join(), 619 exp.Lambda: lambda self: self._parse_lambda(), 620 exp.Lateral: lambda self: self._parse_lateral(), 621 exp.Limit: lambda self: self._parse_limit(), 622 exp.Offset: lambda self: self._parse_offset(), 623 exp.Order: lambda self: self._parse_order(), 624 exp.Ordered: lambda self: self._parse_ordered(), 625 exp.Properties: lambda self: self._parse_properties(), 626 exp.Qualify: lambda self: self._parse_qualify(), 627 exp.Returning: lambda self: self._parse_returning(), 628 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 629 exp.Table: lambda self: self._parse_table_parts(), 630 exp.TableAlias: lambda self: self._parse_table_alias(), 631 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 632 exp.Where: lambda self: self._parse_where(), 633 exp.Window: lambda self: self._parse_named_window(), 634 exp.With: lambda self: self._parse_with(), 635 "JOIN_TYPE": lambda self: self._parse_join_parts(), 636 } 637 638 STATEMENT_PARSERS = { 639 TokenType.ALTER: lambda self: self._parse_alter(), 640 TokenType.BEGIN: lambda self: self._parse_transaction(), 641 TokenType.CACHE: lambda self: self._parse_cache(), 642 TokenType.COMMENT: lambda self: self._parse_comment(), 643 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 644 TokenType.COPY: lambda self: self._parse_copy(), 645 TokenType.CREATE: lambda self: self._parse_create(), 646 TokenType.DELETE: lambda self: self._parse_delete(), 647 TokenType.DESC: lambda self: self._parse_describe(), 648 TokenType.DESCRIBE: lambda self: self._parse_describe(), 649 TokenType.DROP: lambda self: self._parse_drop(), 650 TokenType.INSERT: lambda self: self._parse_insert(), 651 TokenType.KILL: lambda self: self._parse_kill(), 652 TokenType.LOAD: lambda self: self._parse_load(), 653 TokenType.MERGE: lambda self: self._parse_merge(), 654 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 655 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 656 TokenType.REFRESH: lambda self: self._parse_refresh(), 657 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 658 TokenType.SET: lambda self: self._parse_set(), 659 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 660 TokenType.UNCACHE: lambda self: self._parse_uncache(), 661 TokenType.UPDATE: lambda self: self._parse_update(), 662 TokenType.USE: lambda self: self.expression( 663 exp.Use, 664 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 665 this=self._parse_table(schema=False), 666 ), 667 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 668 } 669 670 UNARY_PARSERS = { 671 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 672 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 673 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 674 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 675 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 676 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 677 } 678 679 STRING_PARSERS = { 680 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 681 exp.RawString, this=token.text 682 ), 683 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 684 exp.National, this=token.text 685 ), 686 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 687 TokenType.STRING: lambda self, token: self.expression( 688 exp.Literal, this=token.text, is_string=True 689 ), 690 TokenType.UNICODE_STRING: lambda self, token: self.expression( 691 exp.UnicodeString, 692 this=token.text, 693 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 694 ), 695 } 696 697 NUMERIC_PARSERS = { 698 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 699 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 700 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 701 TokenType.NUMBER: lambda self, token: self.expression( 702 exp.Literal, this=token.text, is_string=False 703 ), 704 } 705 706 PRIMARY_PARSERS = { 707 **STRING_PARSERS, 708 **NUMERIC_PARSERS, 709 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 710 TokenType.NULL: lambda self, _: self.expression(exp.Null), 711 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 712 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 713 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 714 TokenType.STAR: lambda self, _: self.expression( 715 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 716 ), 717 } 718 719 PLACEHOLDER_PARSERS = { 720 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 721 TokenType.PARAMETER: lambda self: self._parse_parameter(), 722 TokenType.COLON: lambda self: ( 723 self.expression(exp.Placeholder, this=self._prev.text) 724 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 725 else None 726 ), 727 } 728 729 RANGE_PARSERS = { 730 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 731 TokenType.GLOB: binary_range_parser(exp.Glob), 732 TokenType.ILIKE: binary_range_parser(exp.ILike), 733 TokenType.IN: lambda self, this: self._parse_in(this), 734 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 735 TokenType.IS: lambda self, this: self._parse_is(this), 736 TokenType.LIKE: binary_range_parser(exp.Like), 737 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 738 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 739 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 740 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 741 } 742 743 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 744 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 745 "AUTO": lambda self: self._parse_auto_property(), 746 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 747 "BACKUP": lambda self: self.expression( 748 exp.BackupProperty, this=self._parse_var(any_token=True) 749 ), 750 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 751 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 752 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 753 "CHECKSUM": lambda self: self._parse_checksum(), 754 "CLUSTER BY": lambda self: self._parse_cluster(), 755 "CLUSTERED": lambda self: self._parse_clustered_by(), 756 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 757 exp.CollateProperty, **kwargs 758 ), 759 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 760 "CONTAINS": lambda self: self._parse_contains_property(), 761 "COPY": lambda self: self._parse_copy_property(), 762 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 763 "DEFINER": lambda self: self._parse_definer(), 764 "DETERMINISTIC": lambda self: self.expression( 765 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 766 ), 767 "DISTKEY": lambda self: self._parse_distkey(), 768 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 769 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 770 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 771 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 772 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 773 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 774 "FREESPACE": lambda self: self._parse_freespace(), 775 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 776 "HEAP": lambda self: self.expression(exp.HeapProperty), 777 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 778 "IMMUTABLE": lambda self: self.expression( 779 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 780 ), 781 "INHERITS": lambda self: self.expression( 782 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 783 ), 784 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 785 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 786 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 787 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 788 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 789 "LIKE": lambda self: self._parse_create_like(), 790 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 791 "LOCK": lambda self: self._parse_locking(), 792 "LOCKING": lambda self: self._parse_locking(), 793 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 794 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 795 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 796 "MODIFIES": lambda self: self._parse_modifies_property(), 797 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 798 "NO": lambda self: self._parse_no_property(), 799 "ON": lambda self: self._parse_on_property(), 800 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 801 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 802 "PARTITION": lambda self: self._parse_partitioned_of(), 803 "PARTITION BY": lambda self: self._parse_partitioned_by(), 804 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 805 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 806 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 807 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 808 "READS": lambda self: self._parse_reads_property(), 809 "REMOTE": lambda self: self._parse_remote_with_connection(), 810 "RETURNS": lambda self: self._parse_returns(), 811 "ROW": lambda self: self._parse_row(), 812 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 813 "SAMPLE": lambda self: self.expression( 814 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 815 ), 816 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 817 "SETTINGS": lambda self: self.expression( 818 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 819 ), 820 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 821 "SORTKEY": lambda self: self._parse_sortkey(), 822 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 823 "STABLE": lambda self: self.expression( 824 exp.StabilityProperty, this=exp.Literal.string("STABLE") 825 ), 826 "STORED": lambda self: self._parse_stored(), 827 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 828 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 829 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 830 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 831 "TO": lambda self: self._parse_to_table(), 832 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 833 "TRANSFORM": lambda self: self.expression( 834 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 835 ), 836 "TTL": lambda self: self._parse_ttl(), 837 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 838 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 839 "VOLATILE": lambda self: self._parse_volatile_property(), 840 "WITH": lambda self: self._parse_with_property(), 841 } 842 843 CONSTRAINT_PARSERS = { 844 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 845 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 846 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 847 "CHARACTER SET": lambda self: self.expression( 848 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 849 ), 850 "CHECK": lambda self: self.expression( 851 exp.CheckColumnConstraint, 852 this=self._parse_wrapped(self._parse_conjunction), 853 enforced=self._match_text_seq("ENFORCED"), 854 ), 855 "COLLATE": lambda self: self.expression( 856 exp.CollateColumnConstraint, this=self._parse_var() 857 ), 858 "COMMENT": lambda self: self.expression( 859 exp.CommentColumnConstraint, this=self._parse_string() 860 ), 861 "COMPRESS": lambda self: self._parse_compress(), 862 "CLUSTERED": lambda self: self.expression( 863 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 864 ), 865 "NONCLUSTERED": lambda self: self.expression( 866 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 867 ), 868 "DEFAULT": lambda self: self.expression( 869 exp.DefaultColumnConstraint, this=self._parse_bitwise() 870 ), 871 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 872 "EPHEMERAL": lambda self: self.expression( 873 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 874 ), 875 "EXCLUDE": lambda self: self.expression( 876 exp.ExcludeColumnConstraint, this=self._parse_index_params() 877 ), 878 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 879 "FORMAT": lambda self: self.expression( 880 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 881 ), 882 "GENERATED": lambda self: self._parse_generated_as_identity(), 883 "IDENTITY": lambda self: self._parse_auto_increment(), 884 "INLINE": lambda self: self._parse_inline(), 885 "LIKE": lambda self: self._parse_create_like(), 886 "NOT": lambda self: self._parse_not_constraint(), 887 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 888 "ON": lambda self: ( 889 self._match(TokenType.UPDATE) 890 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 891 ) 892 or self.expression(exp.OnProperty, this=self._parse_id_var()), 893 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 894 "PERIOD": lambda self: self._parse_period_for_system_time(), 895 "PRIMARY KEY": lambda self: self._parse_primary_key(), 896 "REFERENCES": lambda self: self._parse_references(match=False), 897 "TITLE": lambda self: self.expression( 898 exp.TitleColumnConstraint, this=self._parse_var_or_string() 899 ), 900 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 901 "UNIQUE": lambda self: self._parse_unique(), 902 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 903 "WITH": lambda self: self.expression( 904 exp.Properties, expressions=self._parse_wrapped_properties() 905 ), 906 } 907 908 ALTER_PARSERS = { 909 "ADD": lambda self: self._parse_alter_table_add(), 910 "ALTER": lambda self: self._parse_alter_table_alter(), 911 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 912 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 913 "DROP": lambda self: self._parse_alter_table_drop(), 914 "RENAME": lambda self: self._parse_alter_table_rename(), 915 } 916 917 ALTER_ALTER_PARSERS = { 918 "DISTKEY": lambda self: self._parse_alter_diststyle(), 919 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 920 "SORTKEY": lambda self: self._parse_alter_sortkey(), 921 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 922 } 923 924 SCHEMA_UNNAMED_CONSTRAINTS = { 925 "CHECK", 926 "EXCLUDE", 927 "FOREIGN KEY", 928 "LIKE", 929 "PERIOD", 930 "PRIMARY KEY", 931 "UNIQUE", 932 } 933 934 NO_PAREN_FUNCTION_PARSERS = { 935 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 936 "CASE": lambda self: self._parse_case(), 937 "IF": lambda self: self._parse_if(), 938 "NEXT": lambda self: self._parse_next_value_for(), 939 } 940 941 INVALID_FUNC_NAME_TOKENS = { 942 TokenType.IDENTIFIER, 943 TokenType.STRING, 944 } 945 946 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 947 948 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 949 950 FUNCTION_PARSERS = { 951 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 952 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 953 "DECODE": lambda self: self._parse_decode(), 954 "EXTRACT": lambda self: self._parse_extract(), 955 "JSON_OBJECT": lambda self: self._parse_json_object(), 956 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 957 "JSON_TABLE": lambda self: self._parse_json_table(), 958 "MATCH": lambda self: self._parse_match_against(), 959 "OPENJSON": lambda self: self._parse_open_json(), 960 "POSITION": lambda self: self._parse_position(), 961 "PREDICT": lambda self: self._parse_predict(), 962 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 963 "STRING_AGG": lambda self: self._parse_string_agg(), 964 "SUBSTRING": lambda self: self._parse_substring(), 965 "TRIM": lambda self: self._parse_trim(), 966 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 967 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 968 } 969 970 QUERY_MODIFIER_PARSERS = { 971 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 972 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 973 TokenType.WHERE: lambda self: ("where", self._parse_where()), 974 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 975 TokenType.HAVING: lambda self: ("having", self._parse_having()), 976 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 977 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 978 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 979 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 980 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 981 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 982 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 983 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 984 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 985 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 986 TokenType.CLUSTER_BY: lambda self: ( 987 "cluster", 988 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 989 ), 990 TokenType.DISTRIBUTE_BY: lambda self: ( 991 "distribute", 992 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 993 ), 994 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 995 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 996 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 997 } 998 999 SET_PARSERS = { 1000 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1001 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1002 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1003 "TRANSACTION": lambda self: self._parse_set_transaction(), 1004 } 1005 1006 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1007 1008 TYPE_LITERAL_PARSERS = { 1009 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1010 } 1011 1012 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1013 1014 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1015 1016 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1017 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1018 "ISOLATION": ( 1019 ("LEVEL", "REPEATABLE", "READ"), 1020 ("LEVEL", "READ", "COMMITTED"), 1021 ("LEVEL", "READ", "UNCOMITTED"), 1022 ("LEVEL", "SERIALIZABLE"), 1023 ), 1024 "READ": ("WRITE", "ONLY"), 1025 } 1026 1027 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1028 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1029 ) 1030 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1031 1032 CREATE_SEQUENCE: OPTIONS_TYPE = { 1033 "SCALE": ("EXTEND", "NOEXTEND"), 1034 "SHARD": ("EXTEND", "NOEXTEND"), 1035 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1036 **dict.fromkeys( 1037 ( 1038 "SESSION", 1039 "GLOBAL", 1040 "KEEP", 1041 "NOKEEP", 1042 "ORDER", 1043 "NOORDER", 1044 "NOCACHE", 1045 "CYCLE", 1046 "NOCYCLE", 1047 "NOMINVALUE", 1048 "NOMAXVALUE", 1049 "NOSCALE", 1050 "NOSHARD", 1051 ), 1052 tuple(), 1053 ), 1054 } 1055 1056 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1057 1058 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1059 1060 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1061 1062 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1063 1064 CLONE_KEYWORDS = {"CLONE", "COPY"} 1065 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1066 1067 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1068 1069 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1070 1071 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1072 1073 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1074 1075 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1076 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1077 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1078 1079 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1080 1081 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1082 1083 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1084 1085 DISTINCT_TOKENS = {TokenType.DISTINCT} 1086 1087 NULL_TOKENS = {TokenType.NULL} 1088 1089 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1090 1091 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1092 1093 STRICT_CAST = True 1094 1095 PREFIXED_PIVOT_COLUMNS = False 1096 IDENTIFY_PIVOT_STRINGS = False 1097 1098 LOG_DEFAULTS_TO_LN = False 1099 1100 # Whether ADD is present for each column added by ALTER TABLE 1101 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1102 1103 # Whether the table sample clause expects CSV syntax 1104 TABLESAMPLE_CSV = False 1105 1106 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1107 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1108 1109 # Whether the TRIM function expects the characters to trim as its first argument 1110 TRIM_PATTERN_FIRST = False 1111 1112 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1113 STRING_ALIASES = False 1114 1115 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1116 MODIFIERS_ATTACHED_TO_UNION = True 1117 UNION_MODIFIERS = {"order", "limit", "offset"} 1118 1119 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1120 NO_PAREN_IF_COMMANDS = True 1121 1122 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1123 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1124 1125 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1126 # If this is True and '(' is not found, the keyword will be treated as an identifier 1127 VALUES_FOLLOWED_BY_PAREN = True 1128 1129 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1130 SUPPORTS_IMPLICIT_UNNEST = False 1131 1132 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1133 INTERVAL_SPANS = True 1134 1135 # Whether a PARTITION clause can follow a table reference 1136 SUPPORTS_PARTITION_SELECTION = False 1137 1138 __slots__ = ( 1139 "error_level", 1140 "error_message_context", 1141 "max_errors", 1142 "dialect", 1143 "sql", 1144 "errors", 1145 "_tokens", 1146 "_index", 1147 "_curr", 1148 "_next", 1149 "_prev", 1150 "_prev_comments", 1151 ) 1152 1153 # Autofilled 1154 SHOW_TRIE: t.Dict = {} 1155 SET_TRIE: t.Dict = {} 1156 1157 def __init__( 1158 self, 1159 error_level: t.Optional[ErrorLevel] = None, 1160 error_message_context: int = 100, 1161 max_errors: int = 3, 1162 dialect: DialectType = None, 1163 ): 1164 from sqlglot.dialects import Dialect 1165 1166 self.error_level = error_level or ErrorLevel.IMMEDIATE 1167 self.error_message_context = error_message_context 1168 self.max_errors = max_errors 1169 self.dialect = Dialect.get_or_raise(dialect) 1170 self.reset() 1171 1172 def reset(self): 1173 self.sql = "" 1174 self.errors = [] 1175 self._tokens = [] 1176 self._index = 0 1177 self._curr = None 1178 self._next = None 1179 self._prev = None 1180 self._prev_comments = None 1181 1182 def parse( 1183 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1184 ) -> t.List[t.Optional[exp.Expression]]: 1185 """ 1186 Parses a list of tokens and returns a list of syntax trees, one tree 1187 per parsed SQL statement. 1188 1189 Args: 1190 raw_tokens: The list of tokens. 1191 sql: The original SQL string, used to produce helpful debug messages. 1192 1193 Returns: 1194 The list of the produced syntax trees. 1195 """ 1196 return self._parse( 1197 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1198 ) 1199 1200 def parse_into( 1201 self, 1202 expression_types: exp.IntoType, 1203 raw_tokens: t.List[Token], 1204 sql: t.Optional[str] = None, 1205 ) -> t.List[t.Optional[exp.Expression]]: 1206 """ 1207 Parses a list of tokens into a given Expression type. If a collection of Expression 1208 types is given instead, this method will try to parse the token list into each one 1209 of them, stopping at the first for which the parsing succeeds. 1210 1211 Args: 1212 expression_types: The expression type(s) to try and parse the token list into. 1213 raw_tokens: The list of tokens. 1214 sql: The original SQL string, used to produce helpful debug messages. 1215 1216 Returns: 1217 The target Expression. 1218 """ 1219 errors = [] 1220 for expression_type in ensure_list(expression_types): 1221 parser = self.EXPRESSION_PARSERS.get(expression_type) 1222 if not parser: 1223 raise TypeError(f"No parser registered for {expression_type}") 1224 1225 try: 1226 return self._parse(parser, raw_tokens, sql) 1227 except ParseError as e: 1228 e.errors[0]["into_expression"] = expression_type 1229 errors.append(e) 1230 1231 raise ParseError( 1232 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1233 errors=merge_errors(errors), 1234 ) from errors[-1] 1235 1236 def _parse( 1237 self, 1238 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1239 raw_tokens: t.List[Token], 1240 sql: t.Optional[str] = None, 1241 ) -> t.List[t.Optional[exp.Expression]]: 1242 self.reset() 1243 self.sql = sql or "" 1244 1245 total = len(raw_tokens) 1246 chunks: t.List[t.List[Token]] = [[]] 1247 1248 for i, token in enumerate(raw_tokens): 1249 if token.token_type == TokenType.SEMICOLON: 1250 if token.comments: 1251 chunks.append([token]) 1252 1253 if i < total - 1: 1254 chunks.append([]) 1255 else: 1256 chunks[-1].append(token) 1257 1258 expressions = [] 1259 1260 for tokens in chunks: 1261 self._index = -1 1262 self._tokens = tokens 1263 self._advance() 1264 1265 expressions.append(parse_method(self)) 1266 1267 if self._index < len(self._tokens): 1268 self.raise_error("Invalid expression / Unexpected token") 1269 1270 self.check_errors() 1271 1272 return expressions 1273 1274 def check_errors(self) -> None: 1275 """Logs or raises any found errors, depending on the chosen error level setting.""" 1276 if self.error_level == ErrorLevel.WARN: 1277 for error in self.errors: 1278 logger.error(str(error)) 1279 elif self.error_level == ErrorLevel.RAISE and self.errors: 1280 raise ParseError( 1281 concat_messages(self.errors, self.max_errors), 1282 errors=merge_errors(self.errors), 1283 ) 1284 1285 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1286 """ 1287 Appends an error in the list of recorded errors or raises it, depending on the chosen 1288 error level setting. 1289 """ 1290 token = token or self._curr or self._prev or Token.string("") 1291 start = token.start 1292 end = token.end + 1 1293 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1294 highlight = self.sql[start:end] 1295 end_context = self.sql[end : end + self.error_message_context] 1296 1297 error = ParseError.new( 1298 f"{message}. Line {token.line}, Col: {token.col}.\n" 1299 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1300 description=message, 1301 line=token.line, 1302 col=token.col, 1303 start_context=start_context, 1304 highlight=highlight, 1305 end_context=end_context, 1306 ) 1307 1308 if self.error_level == ErrorLevel.IMMEDIATE: 1309 raise error 1310 1311 self.errors.append(error) 1312 1313 def expression( 1314 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1315 ) -> E: 1316 """ 1317 Creates a new, validated Expression. 1318 1319 Args: 1320 exp_class: The expression class to instantiate. 1321 comments: An optional list of comments to attach to the expression. 1322 kwargs: The arguments to set for the expression along with their respective values. 1323 1324 Returns: 1325 The target expression. 1326 """ 1327 instance = exp_class(**kwargs) 1328 instance.add_comments(comments) if comments else self._add_comments(instance) 1329 return self.validate_expression(instance) 1330 1331 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1332 if expression and self._prev_comments: 1333 expression.add_comments(self._prev_comments) 1334 self._prev_comments = None 1335 1336 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1337 """ 1338 Validates an Expression, making sure that all its mandatory arguments are set. 1339 1340 Args: 1341 expression: The expression to validate. 1342 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1343 1344 Returns: 1345 The validated expression. 1346 """ 1347 if self.error_level != ErrorLevel.IGNORE: 1348 for error_message in expression.error_messages(args): 1349 self.raise_error(error_message) 1350 1351 return expression 1352 1353 def _find_sql(self, start: Token, end: Token) -> str: 1354 return self.sql[start.start : end.end + 1] 1355 1356 def _is_connected(self) -> bool: 1357 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1358 1359 def _advance(self, times: int = 1) -> None: 1360 self._index += times 1361 self._curr = seq_get(self._tokens, self._index) 1362 self._next = seq_get(self._tokens, self._index + 1) 1363 1364 if self._index > 0: 1365 self._prev = self._tokens[self._index - 1] 1366 self._prev_comments = self._prev.comments 1367 else: 1368 self._prev = None 1369 self._prev_comments = None 1370 1371 def _retreat(self, index: int) -> None: 1372 if index != self._index: 1373 self._advance(index - self._index) 1374 1375 def _warn_unsupported(self) -> None: 1376 if len(self._tokens) <= 1: 1377 return 1378 1379 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1380 # interested in emitting a warning for the one being currently processed. 1381 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1382 1383 logger.warning( 1384 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1385 ) 1386 1387 def _parse_command(self) -> exp.Command: 1388 self._warn_unsupported() 1389 return self.expression( 1390 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1391 ) 1392 1393 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1394 """ 1395 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1396 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1397 the parser state accordingly 1398 """ 1399 index = self._index 1400 error_level = self.error_level 1401 1402 self.error_level = ErrorLevel.IMMEDIATE 1403 try: 1404 this = parse_method() 1405 except ParseError: 1406 this = None 1407 finally: 1408 if not this or retreat: 1409 self._retreat(index) 1410 self.error_level = error_level 1411 1412 return this 1413 1414 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1415 start = self._prev 1416 exists = self._parse_exists() if allow_exists else None 1417 1418 self._match(TokenType.ON) 1419 1420 materialized = self._match_text_seq("MATERIALIZED") 1421 kind = self._match_set(self.CREATABLES) and self._prev 1422 if not kind: 1423 return self._parse_as_command(start) 1424 1425 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1426 this = self._parse_user_defined_function(kind=kind.token_type) 1427 elif kind.token_type == TokenType.TABLE: 1428 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1429 elif kind.token_type == TokenType.COLUMN: 1430 this = self._parse_column() 1431 else: 1432 this = self._parse_id_var() 1433 1434 self._match(TokenType.IS) 1435 1436 return self.expression( 1437 exp.Comment, 1438 this=this, 1439 kind=kind.text, 1440 expression=self._parse_string(), 1441 exists=exists, 1442 materialized=materialized, 1443 ) 1444 1445 def _parse_to_table( 1446 self, 1447 ) -> exp.ToTableProperty: 1448 table = self._parse_table_parts(schema=True) 1449 return self.expression(exp.ToTableProperty, this=table) 1450 1451 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1452 def _parse_ttl(self) -> exp.Expression: 1453 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1454 this = self._parse_bitwise() 1455 1456 if self._match_text_seq("DELETE"): 1457 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1458 if self._match_text_seq("RECOMPRESS"): 1459 return self.expression( 1460 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1461 ) 1462 if self._match_text_seq("TO", "DISK"): 1463 return self.expression( 1464 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1465 ) 1466 if self._match_text_seq("TO", "VOLUME"): 1467 return self.expression( 1468 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1469 ) 1470 1471 return this 1472 1473 expressions = self._parse_csv(_parse_ttl_action) 1474 where = self._parse_where() 1475 group = self._parse_group() 1476 1477 aggregates = None 1478 if group and self._match(TokenType.SET): 1479 aggregates = self._parse_csv(self._parse_set_item) 1480 1481 return self.expression( 1482 exp.MergeTreeTTL, 1483 expressions=expressions, 1484 where=where, 1485 group=group, 1486 aggregates=aggregates, 1487 ) 1488 1489 def _parse_statement(self) -> t.Optional[exp.Expression]: 1490 if self._curr is None: 1491 return None 1492 1493 if self._match_set(self.STATEMENT_PARSERS): 1494 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1495 1496 if self._match_set(self.dialect.tokenizer.COMMANDS): 1497 return self._parse_command() 1498 1499 expression = self._parse_expression() 1500 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1501 return self._parse_query_modifiers(expression) 1502 1503 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1504 start = self._prev 1505 temporary = self._match(TokenType.TEMPORARY) 1506 materialized = self._match_text_seq("MATERIALIZED") 1507 1508 kind = self._match_set(self.CREATABLES) and self._prev.text 1509 if not kind: 1510 return self._parse_as_command(start) 1511 1512 if_exists = exists or self._parse_exists() 1513 table = self._parse_table_parts( 1514 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1515 ) 1516 1517 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1518 1519 if self._match(TokenType.L_PAREN, advance=False): 1520 expressions = self._parse_wrapped_csv(self._parse_types) 1521 else: 1522 expressions = None 1523 1524 return self.expression( 1525 exp.Drop, 1526 comments=start.comments, 1527 exists=if_exists, 1528 this=table, 1529 expressions=expressions, 1530 kind=kind.upper(), 1531 temporary=temporary, 1532 materialized=materialized, 1533 cascade=self._match_text_seq("CASCADE"), 1534 constraints=self._match_text_seq("CONSTRAINTS"), 1535 purge=self._match_text_seq("PURGE"), 1536 cluster=cluster, 1537 ) 1538 1539 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1540 return ( 1541 self._match_text_seq("IF") 1542 and (not not_ or self._match(TokenType.NOT)) 1543 and self._match(TokenType.EXISTS) 1544 ) 1545 1546 def _parse_create(self) -> exp.Create | exp.Command: 1547 # Note: this can't be None because we've matched a statement parser 1548 start = self._prev 1549 comments = self._prev_comments 1550 1551 replace = ( 1552 start.token_type == TokenType.REPLACE 1553 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1554 or self._match_pair(TokenType.OR, TokenType.ALTER) 1555 ) 1556 1557 unique = self._match(TokenType.UNIQUE) 1558 1559 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1560 self._advance() 1561 1562 properties = None 1563 create_token = self._match_set(self.CREATABLES) and self._prev 1564 1565 if not create_token: 1566 # exp.Properties.Location.POST_CREATE 1567 properties = self._parse_properties() 1568 create_token = self._match_set(self.CREATABLES) and self._prev 1569 1570 if not properties or not create_token: 1571 return self._parse_as_command(start) 1572 1573 exists = self._parse_exists(not_=True) 1574 this = None 1575 expression: t.Optional[exp.Expression] = None 1576 indexes = None 1577 no_schema_binding = None 1578 begin = None 1579 end = None 1580 clone = None 1581 1582 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1583 nonlocal properties 1584 if properties and temp_props: 1585 properties.expressions.extend(temp_props.expressions) 1586 elif temp_props: 1587 properties = temp_props 1588 1589 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1590 this = self._parse_user_defined_function(kind=create_token.token_type) 1591 1592 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1593 extend_props(self._parse_properties()) 1594 1595 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1596 1597 if not expression: 1598 if self._match(TokenType.COMMAND): 1599 expression = self._parse_as_command(self._prev) 1600 else: 1601 begin = self._match(TokenType.BEGIN) 1602 return_ = self._match_text_seq("RETURN") 1603 1604 if self._match(TokenType.STRING, advance=False): 1605 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1606 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1607 expression = self._parse_string() 1608 extend_props(self._parse_properties()) 1609 else: 1610 expression = self._parse_statement() 1611 1612 end = self._match_text_seq("END") 1613 1614 if return_: 1615 expression = self.expression(exp.Return, this=expression) 1616 elif create_token.token_type == TokenType.INDEX: 1617 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1618 if not self._match(TokenType.ON): 1619 index = self._parse_id_var() 1620 anonymous = False 1621 else: 1622 index = None 1623 anonymous = True 1624 1625 this = self._parse_index(index=index, anonymous=anonymous) 1626 elif create_token.token_type in self.DB_CREATABLES: 1627 table_parts = self._parse_table_parts( 1628 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1629 ) 1630 1631 # exp.Properties.Location.POST_NAME 1632 self._match(TokenType.COMMA) 1633 extend_props(self._parse_properties(before=True)) 1634 1635 this = self._parse_schema(this=table_parts) 1636 1637 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1638 extend_props(self._parse_properties()) 1639 1640 self._match(TokenType.ALIAS) 1641 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1642 # exp.Properties.Location.POST_ALIAS 1643 extend_props(self._parse_properties()) 1644 1645 if create_token.token_type == TokenType.SEQUENCE: 1646 expression = self._parse_types() 1647 extend_props(self._parse_properties()) 1648 else: 1649 expression = self._parse_ddl_select() 1650 1651 if create_token.token_type == TokenType.TABLE: 1652 # exp.Properties.Location.POST_EXPRESSION 1653 extend_props(self._parse_properties()) 1654 1655 indexes = [] 1656 while True: 1657 index = self._parse_index() 1658 1659 # exp.Properties.Location.POST_INDEX 1660 extend_props(self._parse_properties()) 1661 1662 if not index: 1663 break 1664 else: 1665 self._match(TokenType.COMMA) 1666 indexes.append(index) 1667 elif create_token.token_type == TokenType.VIEW: 1668 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1669 no_schema_binding = True 1670 1671 shallow = self._match_text_seq("SHALLOW") 1672 1673 if self._match_texts(self.CLONE_KEYWORDS): 1674 copy = self._prev.text.lower() == "copy" 1675 clone = self.expression( 1676 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1677 ) 1678 1679 if self._curr: 1680 return self._parse_as_command(start) 1681 1682 return self.expression( 1683 exp.Create, 1684 comments=comments, 1685 this=this, 1686 kind=create_token.text.upper(), 1687 replace=replace, 1688 unique=unique, 1689 expression=expression, 1690 exists=exists, 1691 properties=properties, 1692 indexes=indexes, 1693 no_schema_binding=no_schema_binding, 1694 begin=begin, 1695 end=end, 1696 clone=clone, 1697 ) 1698 1699 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1700 seq = exp.SequenceProperties() 1701 1702 options = [] 1703 index = self._index 1704 1705 while self._curr: 1706 self._match(TokenType.COMMA) 1707 if self._match_text_seq("INCREMENT"): 1708 self._match_text_seq("BY") 1709 self._match_text_seq("=") 1710 seq.set("increment", self._parse_term()) 1711 elif self._match_text_seq("MINVALUE"): 1712 seq.set("minvalue", self._parse_term()) 1713 elif self._match_text_seq("MAXVALUE"): 1714 seq.set("maxvalue", self._parse_term()) 1715 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1716 self._match_text_seq("=") 1717 seq.set("start", self._parse_term()) 1718 elif self._match_text_seq("CACHE"): 1719 # T-SQL allows empty CACHE which is initialized dynamically 1720 seq.set("cache", self._parse_number() or True) 1721 elif self._match_text_seq("OWNED", "BY"): 1722 # "OWNED BY NONE" is the default 1723 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1724 else: 1725 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1726 if opt: 1727 options.append(opt) 1728 else: 1729 break 1730 1731 seq.set("options", options if options else None) 1732 return None if self._index == index else seq 1733 1734 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1735 # only used for teradata currently 1736 self._match(TokenType.COMMA) 1737 1738 kwargs = { 1739 "no": self._match_text_seq("NO"), 1740 "dual": self._match_text_seq("DUAL"), 1741 "before": self._match_text_seq("BEFORE"), 1742 "default": self._match_text_seq("DEFAULT"), 1743 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1744 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1745 "after": self._match_text_seq("AFTER"), 1746 "minimum": self._match_texts(("MIN", "MINIMUM")), 1747 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1748 } 1749 1750 if self._match_texts(self.PROPERTY_PARSERS): 1751 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1752 try: 1753 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1754 except TypeError: 1755 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1756 1757 return None 1758 1759 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1760 return self._parse_wrapped_csv(self._parse_property) 1761 1762 def _parse_property(self) -> t.Optional[exp.Expression]: 1763 if self._match_texts(self.PROPERTY_PARSERS): 1764 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1765 1766 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1767 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1768 1769 if self._match_text_seq("COMPOUND", "SORTKEY"): 1770 return self._parse_sortkey(compound=True) 1771 1772 if self._match_text_seq("SQL", "SECURITY"): 1773 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1774 1775 index = self._index 1776 key = self._parse_column() 1777 1778 if not self._match(TokenType.EQ): 1779 self._retreat(index) 1780 return self._parse_sequence_properties() 1781 1782 return self.expression( 1783 exp.Property, 1784 this=key.to_dot() if isinstance(key, exp.Column) else key, 1785 value=self._parse_bitwise() or self._parse_var(any_token=True), 1786 ) 1787 1788 def _parse_stored(self) -> exp.FileFormatProperty: 1789 self._match(TokenType.ALIAS) 1790 1791 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1792 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1793 1794 return self.expression( 1795 exp.FileFormatProperty, 1796 this=( 1797 self.expression( 1798 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1799 ) 1800 if input_format or output_format 1801 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1802 ), 1803 ) 1804 1805 def _parse_unquoted_field(self): 1806 field = self._parse_field() 1807 if isinstance(field, exp.Identifier) and not field.quoted: 1808 field = exp.var(field) 1809 1810 return field 1811 1812 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1813 self._match(TokenType.EQ) 1814 self._match(TokenType.ALIAS) 1815 1816 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1817 1818 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1819 properties = [] 1820 while True: 1821 if before: 1822 prop = self._parse_property_before() 1823 else: 1824 prop = self._parse_property() 1825 if not prop: 1826 break 1827 for p in ensure_list(prop): 1828 properties.append(p) 1829 1830 if properties: 1831 return self.expression(exp.Properties, expressions=properties) 1832 1833 return None 1834 1835 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1836 return self.expression( 1837 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1838 ) 1839 1840 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1841 if self._index >= 2: 1842 pre_volatile_token = self._tokens[self._index - 2] 1843 else: 1844 pre_volatile_token = None 1845 1846 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1847 return exp.VolatileProperty() 1848 1849 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1850 1851 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1852 self._match_pair(TokenType.EQ, TokenType.ON) 1853 1854 prop = self.expression(exp.WithSystemVersioningProperty) 1855 if self._match(TokenType.L_PAREN): 1856 self._match_text_seq("HISTORY_TABLE", "=") 1857 prop.set("this", self._parse_table_parts()) 1858 1859 if self._match(TokenType.COMMA): 1860 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1861 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1862 1863 self._match_r_paren() 1864 1865 return prop 1866 1867 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1868 if self._match(TokenType.L_PAREN, advance=False): 1869 return self._parse_wrapped_properties() 1870 1871 if self._match_text_seq("JOURNAL"): 1872 return self._parse_withjournaltable() 1873 1874 if self._match_texts(self.VIEW_ATTRIBUTES): 1875 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1876 1877 if self._match_text_seq("DATA"): 1878 return self._parse_withdata(no=False) 1879 elif self._match_text_seq("NO", "DATA"): 1880 return self._parse_withdata(no=True) 1881 1882 if not self._next: 1883 return None 1884 1885 return self._parse_withisolatedloading() 1886 1887 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1888 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1889 self._match(TokenType.EQ) 1890 1891 user = self._parse_id_var() 1892 self._match(TokenType.PARAMETER) 1893 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1894 1895 if not user or not host: 1896 return None 1897 1898 return exp.DefinerProperty(this=f"{user}@{host}") 1899 1900 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1901 self._match(TokenType.TABLE) 1902 self._match(TokenType.EQ) 1903 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1904 1905 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1906 return self.expression(exp.LogProperty, no=no) 1907 1908 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1909 return self.expression(exp.JournalProperty, **kwargs) 1910 1911 def _parse_checksum(self) -> exp.ChecksumProperty: 1912 self._match(TokenType.EQ) 1913 1914 on = None 1915 if self._match(TokenType.ON): 1916 on = True 1917 elif self._match_text_seq("OFF"): 1918 on = False 1919 1920 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1921 1922 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1923 return self.expression( 1924 exp.Cluster, 1925 expressions=( 1926 self._parse_wrapped_csv(self._parse_ordered) 1927 if wrapped 1928 else self._parse_csv(self._parse_ordered) 1929 ), 1930 ) 1931 1932 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1933 self._match_text_seq("BY") 1934 1935 self._match_l_paren() 1936 expressions = self._parse_csv(self._parse_column) 1937 self._match_r_paren() 1938 1939 if self._match_text_seq("SORTED", "BY"): 1940 self._match_l_paren() 1941 sorted_by = self._parse_csv(self._parse_ordered) 1942 self._match_r_paren() 1943 else: 1944 sorted_by = None 1945 1946 self._match(TokenType.INTO) 1947 buckets = self._parse_number() 1948 self._match_text_seq("BUCKETS") 1949 1950 return self.expression( 1951 exp.ClusteredByProperty, 1952 expressions=expressions, 1953 sorted_by=sorted_by, 1954 buckets=buckets, 1955 ) 1956 1957 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1958 if not self._match_text_seq("GRANTS"): 1959 self._retreat(self._index - 1) 1960 return None 1961 1962 return self.expression(exp.CopyGrantsProperty) 1963 1964 def _parse_freespace(self) -> exp.FreespaceProperty: 1965 self._match(TokenType.EQ) 1966 return self.expression( 1967 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1968 ) 1969 1970 def _parse_mergeblockratio( 1971 self, no: bool = False, default: bool = False 1972 ) -> exp.MergeBlockRatioProperty: 1973 if self._match(TokenType.EQ): 1974 return self.expression( 1975 exp.MergeBlockRatioProperty, 1976 this=self._parse_number(), 1977 percent=self._match(TokenType.PERCENT), 1978 ) 1979 1980 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1981 1982 def _parse_datablocksize( 1983 self, 1984 default: t.Optional[bool] = None, 1985 minimum: t.Optional[bool] = None, 1986 maximum: t.Optional[bool] = None, 1987 ) -> exp.DataBlocksizeProperty: 1988 self._match(TokenType.EQ) 1989 size = self._parse_number() 1990 1991 units = None 1992 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1993 units = self._prev.text 1994 1995 return self.expression( 1996 exp.DataBlocksizeProperty, 1997 size=size, 1998 units=units, 1999 default=default, 2000 minimum=minimum, 2001 maximum=maximum, 2002 ) 2003 2004 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2005 self._match(TokenType.EQ) 2006 always = self._match_text_seq("ALWAYS") 2007 manual = self._match_text_seq("MANUAL") 2008 never = self._match_text_seq("NEVER") 2009 default = self._match_text_seq("DEFAULT") 2010 2011 autotemp = None 2012 if self._match_text_seq("AUTOTEMP"): 2013 autotemp = self._parse_schema() 2014 2015 return self.expression( 2016 exp.BlockCompressionProperty, 2017 always=always, 2018 manual=manual, 2019 never=never, 2020 default=default, 2021 autotemp=autotemp, 2022 ) 2023 2024 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2025 index = self._index 2026 no = self._match_text_seq("NO") 2027 concurrent = self._match_text_seq("CONCURRENT") 2028 2029 if not self._match_text_seq("ISOLATED", "LOADING"): 2030 self._retreat(index) 2031 return None 2032 2033 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2034 return self.expression( 2035 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2036 ) 2037 2038 def _parse_locking(self) -> exp.LockingProperty: 2039 if self._match(TokenType.TABLE): 2040 kind = "TABLE" 2041 elif self._match(TokenType.VIEW): 2042 kind = "VIEW" 2043 elif self._match(TokenType.ROW): 2044 kind = "ROW" 2045 elif self._match_text_seq("DATABASE"): 2046 kind = "DATABASE" 2047 else: 2048 kind = None 2049 2050 if kind in ("DATABASE", "TABLE", "VIEW"): 2051 this = self._parse_table_parts() 2052 else: 2053 this = None 2054 2055 if self._match(TokenType.FOR): 2056 for_or_in = "FOR" 2057 elif self._match(TokenType.IN): 2058 for_or_in = "IN" 2059 else: 2060 for_or_in = None 2061 2062 if self._match_text_seq("ACCESS"): 2063 lock_type = "ACCESS" 2064 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2065 lock_type = "EXCLUSIVE" 2066 elif self._match_text_seq("SHARE"): 2067 lock_type = "SHARE" 2068 elif self._match_text_seq("READ"): 2069 lock_type = "READ" 2070 elif self._match_text_seq("WRITE"): 2071 lock_type = "WRITE" 2072 elif self._match_text_seq("CHECKSUM"): 2073 lock_type = "CHECKSUM" 2074 else: 2075 lock_type = None 2076 2077 override = self._match_text_seq("OVERRIDE") 2078 2079 return self.expression( 2080 exp.LockingProperty, 2081 this=this, 2082 kind=kind, 2083 for_or_in=for_or_in, 2084 lock_type=lock_type, 2085 override=override, 2086 ) 2087 2088 def _parse_partition_by(self) -> t.List[exp.Expression]: 2089 if self._match(TokenType.PARTITION_BY): 2090 return self._parse_csv(self._parse_conjunction) 2091 return [] 2092 2093 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2094 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2095 if self._match_text_seq("MINVALUE"): 2096 return exp.var("MINVALUE") 2097 if self._match_text_seq("MAXVALUE"): 2098 return exp.var("MAXVALUE") 2099 return self._parse_bitwise() 2100 2101 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2102 expression = None 2103 from_expressions = None 2104 to_expressions = None 2105 2106 if self._match(TokenType.IN): 2107 this = self._parse_wrapped_csv(self._parse_bitwise) 2108 elif self._match(TokenType.FROM): 2109 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2110 self._match_text_seq("TO") 2111 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2112 elif self._match_text_seq("WITH", "(", "MODULUS"): 2113 this = self._parse_number() 2114 self._match_text_seq(",", "REMAINDER") 2115 expression = self._parse_number() 2116 self._match_r_paren() 2117 else: 2118 self.raise_error("Failed to parse partition bound spec.") 2119 2120 return self.expression( 2121 exp.PartitionBoundSpec, 2122 this=this, 2123 expression=expression, 2124 from_expressions=from_expressions, 2125 to_expressions=to_expressions, 2126 ) 2127 2128 # https://www.postgresql.org/docs/current/sql-createtable.html 2129 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2130 if not self._match_text_seq("OF"): 2131 self._retreat(self._index - 1) 2132 return None 2133 2134 this = self._parse_table(schema=True) 2135 2136 if self._match(TokenType.DEFAULT): 2137 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2138 elif self._match_text_seq("FOR", "VALUES"): 2139 expression = self._parse_partition_bound_spec() 2140 else: 2141 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2142 2143 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2144 2145 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2146 self._match(TokenType.EQ) 2147 return self.expression( 2148 exp.PartitionedByProperty, 2149 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2150 ) 2151 2152 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2153 if self._match_text_seq("AND", "STATISTICS"): 2154 statistics = True 2155 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2156 statistics = False 2157 else: 2158 statistics = None 2159 2160 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2161 2162 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2163 if self._match_text_seq("SQL"): 2164 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2165 return None 2166 2167 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2168 if self._match_text_seq("SQL", "DATA"): 2169 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2170 return None 2171 2172 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2173 if self._match_text_seq("PRIMARY", "INDEX"): 2174 return exp.NoPrimaryIndexProperty() 2175 if self._match_text_seq("SQL"): 2176 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2177 return None 2178 2179 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2180 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2181 return exp.OnCommitProperty() 2182 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2183 return exp.OnCommitProperty(delete=True) 2184 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2185 2186 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2187 if self._match_text_seq("SQL", "DATA"): 2188 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2189 return None 2190 2191 def _parse_distkey(self) -> exp.DistKeyProperty: 2192 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2193 2194 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2195 table = self._parse_table(schema=True) 2196 2197 options = [] 2198 while self._match_texts(("INCLUDING", "EXCLUDING")): 2199 this = self._prev.text.upper() 2200 2201 id_var = self._parse_id_var() 2202 if not id_var: 2203 return None 2204 2205 options.append( 2206 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2207 ) 2208 2209 return self.expression(exp.LikeProperty, this=table, expressions=options) 2210 2211 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2212 return self.expression( 2213 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2214 ) 2215 2216 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2217 self._match(TokenType.EQ) 2218 return self.expression( 2219 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2220 ) 2221 2222 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2223 self._match_text_seq("WITH", "CONNECTION") 2224 return self.expression( 2225 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2226 ) 2227 2228 def _parse_returns(self) -> exp.ReturnsProperty: 2229 value: t.Optional[exp.Expression] 2230 is_table = self._match(TokenType.TABLE) 2231 2232 if is_table: 2233 if self._match(TokenType.LT): 2234 value = self.expression( 2235 exp.Schema, 2236 this="TABLE", 2237 expressions=self._parse_csv(self._parse_struct_types), 2238 ) 2239 if not self._match(TokenType.GT): 2240 self.raise_error("Expecting >") 2241 else: 2242 value = self._parse_schema(exp.var("TABLE")) 2243 else: 2244 value = self._parse_types() 2245 2246 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2247 2248 def _parse_describe(self) -> exp.Describe: 2249 kind = self._match_set(self.CREATABLES) and self._prev.text 2250 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2251 if self._match(TokenType.DOT): 2252 style = None 2253 self._retreat(self._index - 2) 2254 this = self._parse_table(schema=True) 2255 properties = self._parse_properties() 2256 expressions = properties.expressions if properties else None 2257 return self.expression( 2258 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2259 ) 2260 2261 def _parse_insert(self) -> exp.Insert: 2262 comments = ensure_list(self._prev_comments) 2263 hint = self._parse_hint() 2264 overwrite = self._match(TokenType.OVERWRITE) 2265 ignore = self._match(TokenType.IGNORE) 2266 local = self._match_text_seq("LOCAL") 2267 alternative = None 2268 is_function = None 2269 2270 if self._match_text_seq("DIRECTORY"): 2271 this: t.Optional[exp.Expression] = self.expression( 2272 exp.Directory, 2273 this=self._parse_var_or_string(), 2274 local=local, 2275 row_format=self._parse_row_format(match_row=True), 2276 ) 2277 else: 2278 if self._match(TokenType.OR): 2279 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2280 2281 self._match(TokenType.INTO) 2282 comments += ensure_list(self._prev_comments) 2283 self._match(TokenType.TABLE) 2284 is_function = self._match(TokenType.FUNCTION) 2285 2286 this = ( 2287 self._parse_table(schema=True, parse_partition=True) 2288 if not is_function 2289 else self._parse_function() 2290 ) 2291 2292 returning = self._parse_returning() 2293 2294 return self.expression( 2295 exp.Insert, 2296 comments=comments, 2297 hint=hint, 2298 is_function=is_function, 2299 this=this, 2300 stored=self._match_text_seq("STORED") and self._parse_stored(), 2301 by_name=self._match_text_seq("BY", "NAME"), 2302 exists=self._parse_exists(), 2303 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2304 and self._parse_conjunction(), 2305 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2306 conflict=self._parse_on_conflict(), 2307 returning=returning or self._parse_returning(), 2308 overwrite=overwrite, 2309 alternative=alternative, 2310 ignore=ignore, 2311 ) 2312 2313 def _parse_kill(self) -> exp.Kill: 2314 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2315 2316 return self.expression( 2317 exp.Kill, 2318 this=self._parse_primary(), 2319 kind=kind, 2320 ) 2321 2322 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2323 conflict = self._match_text_seq("ON", "CONFLICT") 2324 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2325 2326 if not conflict and not duplicate: 2327 return None 2328 2329 conflict_keys = None 2330 constraint = None 2331 2332 if conflict: 2333 if self._match_text_seq("ON", "CONSTRAINT"): 2334 constraint = self._parse_id_var() 2335 elif self._match(TokenType.L_PAREN): 2336 conflict_keys = self._parse_csv(self._parse_id_var) 2337 self._match_r_paren() 2338 2339 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2340 if self._prev.token_type == TokenType.UPDATE: 2341 self._match(TokenType.SET) 2342 expressions = self._parse_csv(self._parse_equality) 2343 else: 2344 expressions = None 2345 2346 return self.expression( 2347 exp.OnConflict, 2348 duplicate=duplicate, 2349 expressions=expressions, 2350 action=action, 2351 conflict_keys=conflict_keys, 2352 constraint=constraint, 2353 ) 2354 2355 def _parse_returning(self) -> t.Optional[exp.Returning]: 2356 if not self._match(TokenType.RETURNING): 2357 return None 2358 return self.expression( 2359 exp.Returning, 2360 expressions=self._parse_csv(self._parse_expression), 2361 into=self._match(TokenType.INTO) and self._parse_table_part(), 2362 ) 2363 2364 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2365 if not self._match(TokenType.FORMAT): 2366 return None 2367 return self._parse_row_format() 2368 2369 def _parse_row_format( 2370 self, match_row: bool = False 2371 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2372 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2373 return None 2374 2375 if self._match_text_seq("SERDE"): 2376 this = self._parse_string() 2377 2378 serde_properties = None 2379 if self._match(TokenType.SERDE_PROPERTIES): 2380 serde_properties = self.expression( 2381 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2382 ) 2383 2384 return self.expression( 2385 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2386 ) 2387 2388 self._match_text_seq("DELIMITED") 2389 2390 kwargs = {} 2391 2392 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2393 kwargs["fields"] = self._parse_string() 2394 if self._match_text_seq("ESCAPED", "BY"): 2395 kwargs["escaped"] = self._parse_string() 2396 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2397 kwargs["collection_items"] = self._parse_string() 2398 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2399 kwargs["map_keys"] = self._parse_string() 2400 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2401 kwargs["lines"] = self._parse_string() 2402 if self._match_text_seq("NULL", "DEFINED", "AS"): 2403 kwargs["null"] = self._parse_string() 2404 2405 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2406 2407 def _parse_load(self) -> exp.LoadData | exp.Command: 2408 if self._match_text_seq("DATA"): 2409 local = self._match_text_seq("LOCAL") 2410 self._match_text_seq("INPATH") 2411 inpath = self._parse_string() 2412 overwrite = self._match(TokenType.OVERWRITE) 2413 self._match_pair(TokenType.INTO, TokenType.TABLE) 2414 2415 return self.expression( 2416 exp.LoadData, 2417 this=self._parse_table(schema=True), 2418 local=local, 2419 overwrite=overwrite, 2420 inpath=inpath, 2421 partition=self._parse_partition(), 2422 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2423 serde=self._match_text_seq("SERDE") and self._parse_string(), 2424 ) 2425 return self._parse_as_command(self._prev) 2426 2427 def _parse_delete(self) -> exp.Delete: 2428 # This handles MySQL's "Multiple-Table Syntax" 2429 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2430 tables = None 2431 comments = self._prev_comments 2432 if not self._match(TokenType.FROM, advance=False): 2433 tables = self._parse_csv(self._parse_table) or None 2434 2435 returning = self._parse_returning() 2436 2437 return self.expression( 2438 exp.Delete, 2439 comments=comments, 2440 tables=tables, 2441 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2442 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2443 where=self._parse_where(), 2444 returning=returning or self._parse_returning(), 2445 limit=self._parse_limit(), 2446 ) 2447 2448 def _parse_update(self) -> exp.Update: 2449 comments = self._prev_comments 2450 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2451 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2452 returning = self._parse_returning() 2453 return self.expression( 2454 exp.Update, 2455 comments=comments, 2456 **{ # type: ignore 2457 "this": this, 2458 "expressions": expressions, 2459 "from": self._parse_from(joins=True), 2460 "where": self._parse_where(), 2461 "returning": returning or self._parse_returning(), 2462 "order": self._parse_order(), 2463 "limit": self._parse_limit(), 2464 }, 2465 ) 2466 2467 def _parse_uncache(self) -> exp.Uncache: 2468 if not self._match(TokenType.TABLE): 2469 self.raise_error("Expecting TABLE after UNCACHE") 2470 2471 return self.expression( 2472 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2473 ) 2474 2475 def _parse_cache(self) -> exp.Cache: 2476 lazy = self._match_text_seq("LAZY") 2477 self._match(TokenType.TABLE) 2478 table = self._parse_table(schema=True) 2479 2480 options = [] 2481 if self._match_text_seq("OPTIONS"): 2482 self._match_l_paren() 2483 k = self._parse_string() 2484 self._match(TokenType.EQ) 2485 v = self._parse_string() 2486 options = [k, v] 2487 self._match_r_paren() 2488 2489 self._match(TokenType.ALIAS) 2490 return self.expression( 2491 exp.Cache, 2492 this=table, 2493 lazy=lazy, 2494 options=options, 2495 expression=self._parse_select(nested=True), 2496 ) 2497 2498 def _parse_partition(self) -> t.Optional[exp.Partition]: 2499 if not self._match(TokenType.PARTITION): 2500 return None 2501 2502 return self.expression( 2503 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2504 ) 2505 2506 def _parse_value(self) -> t.Optional[exp.Tuple]: 2507 if self._match(TokenType.L_PAREN): 2508 expressions = self._parse_csv(self._parse_expression) 2509 self._match_r_paren() 2510 return self.expression(exp.Tuple, expressions=expressions) 2511 2512 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2513 expression = self._parse_expression() 2514 if expression: 2515 return self.expression(exp.Tuple, expressions=[expression]) 2516 return None 2517 2518 def _parse_projections(self) -> t.List[exp.Expression]: 2519 return self._parse_expressions() 2520 2521 def _parse_select( 2522 self, 2523 nested: bool = False, 2524 table: bool = False, 2525 parse_subquery_alias: bool = True, 2526 parse_set_operation: bool = True, 2527 ) -> t.Optional[exp.Expression]: 2528 cte = self._parse_with() 2529 2530 if cte: 2531 this = self._parse_statement() 2532 2533 if not this: 2534 self.raise_error("Failed to parse any statement following CTE") 2535 return cte 2536 2537 if "with" in this.arg_types: 2538 this.set("with", cte) 2539 else: 2540 self.raise_error(f"{this.key} does not support CTE") 2541 this = cte 2542 2543 return this 2544 2545 # duckdb supports leading with FROM x 2546 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2547 2548 if self._match(TokenType.SELECT): 2549 comments = self._prev_comments 2550 2551 hint = self._parse_hint() 2552 all_ = self._match(TokenType.ALL) 2553 distinct = self._match_set(self.DISTINCT_TOKENS) 2554 2555 kind = ( 2556 self._match(TokenType.ALIAS) 2557 and self._match_texts(("STRUCT", "VALUE")) 2558 and self._prev.text.upper() 2559 ) 2560 2561 if distinct: 2562 distinct = self.expression( 2563 exp.Distinct, 2564 on=self._parse_value() if self._match(TokenType.ON) else None, 2565 ) 2566 2567 if all_ and distinct: 2568 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2569 2570 limit = self._parse_limit(top=True) 2571 projections = self._parse_projections() 2572 2573 this = self.expression( 2574 exp.Select, 2575 kind=kind, 2576 hint=hint, 2577 distinct=distinct, 2578 expressions=projections, 2579 limit=limit, 2580 ) 2581 this.comments = comments 2582 2583 into = self._parse_into() 2584 if into: 2585 this.set("into", into) 2586 2587 if not from_: 2588 from_ = self._parse_from() 2589 2590 if from_: 2591 this.set("from", from_) 2592 2593 this = self._parse_query_modifiers(this) 2594 elif (table or nested) and self._match(TokenType.L_PAREN): 2595 if self._match(TokenType.PIVOT): 2596 this = self._parse_simplified_pivot() 2597 elif self._match(TokenType.FROM): 2598 this = exp.select("*").from_( 2599 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2600 ) 2601 else: 2602 this = ( 2603 self._parse_table() 2604 if table 2605 else self._parse_select(nested=True, parse_set_operation=False) 2606 ) 2607 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2608 2609 self._match_r_paren() 2610 2611 # We return early here so that the UNION isn't attached to the subquery by the 2612 # following call to _parse_set_operations, but instead becomes the parent node 2613 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2614 elif self._match(TokenType.VALUES, advance=False): 2615 this = self._parse_derived_table_values() 2616 elif from_: 2617 this = exp.select("*").from_(from_.this, copy=False) 2618 else: 2619 this = None 2620 2621 if parse_set_operation: 2622 return self._parse_set_operations(this) 2623 return this 2624 2625 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2626 if not skip_with_token and not self._match(TokenType.WITH): 2627 return None 2628 2629 comments = self._prev_comments 2630 recursive = self._match(TokenType.RECURSIVE) 2631 2632 expressions = [] 2633 while True: 2634 expressions.append(self._parse_cte()) 2635 2636 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2637 break 2638 else: 2639 self._match(TokenType.WITH) 2640 2641 return self.expression( 2642 exp.With, comments=comments, expressions=expressions, recursive=recursive 2643 ) 2644 2645 def _parse_cte(self) -> exp.CTE: 2646 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2647 if not alias or not alias.this: 2648 self.raise_error("Expected CTE to have alias") 2649 2650 self._match(TokenType.ALIAS) 2651 2652 if self._match_text_seq("NOT", "MATERIALIZED"): 2653 materialized = False 2654 elif self._match_text_seq("MATERIALIZED"): 2655 materialized = True 2656 else: 2657 materialized = None 2658 2659 return self.expression( 2660 exp.CTE, 2661 this=self._parse_wrapped(self._parse_statement), 2662 alias=alias, 2663 materialized=materialized, 2664 ) 2665 2666 def _parse_table_alias( 2667 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2668 ) -> t.Optional[exp.TableAlias]: 2669 any_token = self._match(TokenType.ALIAS) 2670 alias = ( 2671 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2672 or self._parse_string_as_identifier() 2673 ) 2674 2675 index = self._index 2676 if self._match(TokenType.L_PAREN): 2677 columns = self._parse_csv(self._parse_function_parameter) 2678 self._match_r_paren() if columns else self._retreat(index) 2679 else: 2680 columns = None 2681 2682 if not alias and not columns: 2683 return None 2684 2685 return self.expression(exp.TableAlias, this=alias, columns=columns) 2686 2687 def _parse_subquery( 2688 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2689 ) -> t.Optional[exp.Subquery]: 2690 if not this: 2691 return None 2692 2693 return self.expression( 2694 exp.Subquery, 2695 this=this, 2696 pivots=self._parse_pivots(), 2697 alias=self._parse_table_alias() if parse_alias else None, 2698 ) 2699 2700 def _implicit_unnests_to_explicit(self, this: E) -> E: 2701 from sqlglot.optimizer.normalize_identifiers import ( 2702 normalize_identifiers as _norm, 2703 ) 2704 2705 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2706 for i, join in enumerate(this.args.get("joins") or []): 2707 table = join.this 2708 normalized_table = table.copy() 2709 normalized_table.meta["maybe_column"] = True 2710 normalized_table = _norm(normalized_table, dialect=self.dialect) 2711 2712 if isinstance(table, exp.Table) and not join.args.get("on"): 2713 if normalized_table.parts[0].name in refs: 2714 table_as_column = table.to_column() 2715 unnest = exp.Unnest(expressions=[table_as_column]) 2716 2717 # Table.to_column creates a parent Alias node that we want to convert to 2718 # a TableAlias and attach to the Unnest, so it matches the parser's output 2719 if isinstance(table.args.get("alias"), exp.TableAlias): 2720 table_as_column.replace(table_as_column.this) 2721 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2722 2723 table.replace(unnest) 2724 2725 refs.add(normalized_table.alias_or_name) 2726 2727 return this 2728 2729 def _parse_query_modifiers( 2730 self, this: t.Optional[exp.Expression] 2731 ) -> t.Optional[exp.Expression]: 2732 if isinstance(this, (exp.Query, exp.Table)): 2733 for join in self._parse_joins(): 2734 this.append("joins", join) 2735 for lateral in iter(self._parse_lateral, None): 2736 this.append("laterals", lateral) 2737 2738 while True: 2739 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2740 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2741 key, expression = parser(self) 2742 2743 if expression: 2744 this.set(key, expression) 2745 if key == "limit": 2746 offset = expression.args.pop("offset", None) 2747 2748 if offset: 2749 offset = exp.Offset(expression=offset) 2750 this.set("offset", offset) 2751 2752 limit_by_expressions = expression.expressions 2753 expression.set("expressions", None) 2754 offset.set("expressions", limit_by_expressions) 2755 continue 2756 break 2757 2758 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2759 this = self._implicit_unnests_to_explicit(this) 2760 2761 return this 2762 2763 def _parse_hint(self) -> t.Optional[exp.Hint]: 2764 if self._match(TokenType.HINT): 2765 hints = [] 2766 for hint in iter( 2767 lambda: self._parse_csv( 2768 lambda: self._parse_function() or self._parse_var(upper=True) 2769 ), 2770 [], 2771 ): 2772 hints.extend(hint) 2773 2774 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2775 self.raise_error("Expected */ after HINT") 2776 2777 return self.expression(exp.Hint, expressions=hints) 2778 2779 return None 2780 2781 def _parse_into(self) -> t.Optional[exp.Into]: 2782 if not self._match(TokenType.INTO): 2783 return None 2784 2785 temp = self._match(TokenType.TEMPORARY) 2786 unlogged = self._match_text_seq("UNLOGGED") 2787 self._match(TokenType.TABLE) 2788 2789 return self.expression( 2790 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2791 ) 2792 2793 def _parse_from( 2794 self, joins: bool = False, skip_from_token: bool = False 2795 ) -> t.Optional[exp.From]: 2796 if not skip_from_token and not self._match(TokenType.FROM): 2797 return None 2798 2799 return self.expression( 2800 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2801 ) 2802 2803 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2804 return self.expression( 2805 exp.MatchRecognizeMeasure, 2806 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2807 this=self._parse_expression(), 2808 ) 2809 2810 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2811 if not self._match(TokenType.MATCH_RECOGNIZE): 2812 return None 2813 2814 self._match_l_paren() 2815 2816 partition = self._parse_partition_by() 2817 order = self._parse_order() 2818 2819 measures = ( 2820 self._parse_csv(self._parse_match_recognize_measure) 2821 if self._match_text_seq("MEASURES") 2822 else None 2823 ) 2824 2825 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2826 rows = exp.var("ONE ROW PER MATCH") 2827 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2828 text = "ALL ROWS PER MATCH" 2829 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2830 text += " SHOW EMPTY MATCHES" 2831 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2832 text += " OMIT EMPTY MATCHES" 2833 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2834 text += " WITH UNMATCHED ROWS" 2835 rows = exp.var(text) 2836 else: 2837 rows = None 2838 2839 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2840 text = "AFTER MATCH SKIP" 2841 if self._match_text_seq("PAST", "LAST", "ROW"): 2842 text += " PAST LAST ROW" 2843 elif self._match_text_seq("TO", "NEXT", "ROW"): 2844 text += " TO NEXT ROW" 2845 elif self._match_text_seq("TO", "FIRST"): 2846 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2847 elif self._match_text_seq("TO", "LAST"): 2848 text += f" TO LAST {self._advance_any().text}" # type: ignore 2849 after = exp.var(text) 2850 else: 2851 after = None 2852 2853 if self._match_text_seq("PATTERN"): 2854 self._match_l_paren() 2855 2856 if not self._curr: 2857 self.raise_error("Expecting )", self._curr) 2858 2859 paren = 1 2860 start = self._curr 2861 2862 while self._curr and paren > 0: 2863 if self._curr.token_type == TokenType.L_PAREN: 2864 paren += 1 2865 if self._curr.token_type == TokenType.R_PAREN: 2866 paren -= 1 2867 2868 end = self._prev 2869 self._advance() 2870 2871 if paren > 0: 2872 self.raise_error("Expecting )", self._curr) 2873 2874 pattern = exp.var(self._find_sql(start, end)) 2875 else: 2876 pattern = None 2877 2878 define = ( 2879 self._parse_csv(self._parse_name_as_expression) 2880 if self._match_text_seq("DEFINE") 2881 else None 2882 ) 2883 2884 self._match_r_paren() 2885 2886 return self.expression( 2887 exp.MatchRecognize, 2888 partition_by=partition, 2889 order=order, 2890 measures=measures, 2891 rows=rows, 2892 after=after, 2893 pattern=pattern, 2894 define=define, 2895 alias=self._parse_table_alias(), 2896 ) 2897 2898 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2899 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2900 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2901 cross_apply = False 2902 2903 if cross_apply is not None: 2904 this = self._parse_select(table=True) 2905 view = None 2906 outer = None 2907 elif self._match(TokenType.LATERAL): 2908 this = self._parse_select(table=True) 2909 view = self._match(TokenType.VIEW) 2910 outer = self._match(TokenType.OUTER) 2911 else: 2912 return None 2913 2914 if not this: 2915 this = ( 2916 self._parse_unnest() 2917 or self._parse_function() 2918 or self._parse_id_var(any_token=False) 2919 ) 2920 2921 while self._match(TokenType.DOT): 2922 this = exp.Dot( 2923 this=this, 2924 expression=self._parse_function() or self._parse_id_var(any_token=False), 2925 ) 2926 2927 if view: 2928 table = self._parse_id_var(any_token=False) 2929 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2930 table_alias: t.Optional[exp.TableAlias] = self.expression( 2931 exp.TableAlias, this=table, columns=columns 2932 ) 2933 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2934 # We move the alias from the lateral's child node to the lateral itself 2935 table_alias = this.args["alias"].pop() 2936 else: 2937 table_alias = self._parse_table_alias() 2938 2939 return self.expression( 2940 exp.Lateral, 2941 this=this, 2942 view=view, 2943 outer=outer, 2944 alias=table_alias, 2945 cross_apply=cross_apply, 2946 ) 2947 2948 def _parse_join_parts( 2949 self, 2950 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2951 return ( 2952 self._match_set(self.JOIN_METHODS) and self._prev, 2953 self._match_set(self.JOIN_SIDES) and self._prev, 2954 self._match_set(self.JOIN_KINDS) and self._prev, 2955 ) 2956 2957 def _parse_join( 2958 self, skip_join_token: bool = False, parse_bracket: bool = False 2959 ) -> t.Optional[exp.Join]: 2960 if self._match(TokenType.COMMA): 2961 return self.expression(exp.Join, this=self._parse_table()) 2962 2963 index = self._index 2964 method, side, kind = self._parse_join_parts() 2965 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2966 join = self._match(TokenType.JOIN) 2967 2968 if not skip_join_token and not join: 2969 self._retreat(index) 2970 kind = None 2971 method = None 2972 side = None 2973 2974 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2975 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2976 2977 if not skip_join_token and not join and not outer_apply and not cross_apply: 2978 return None 2979 2980 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2981 2982 if method: 2983 kwargs["method"] = method.text 2984 if side: 2985 kwargs["side"] = side.text 2986 if kind: 2987 kwargs["kind"] = kind.text 2988 if hint: 2989 kwargs["hint"] = hint 2990 2991 if self._match(TokenType.MATCH_CONDITION): 2992 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 2993 2994 if self._match(TokenType.ON): 2995 kwargs["on"] = self._parse_conjunction() 2996 elif self._match(TokenType.USING): 2997 kwargs["using"] = self._parse_wrapped_id_vars() 2998 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 2999 kind and kind.token_type == TokenType.CROSS 3000 ): 3001 index = self._index 3002 joins: t.Optional[list] = list(self._parse_joins()) 3003 3004 if joins and self._match(TokenType.ON): 3005 kwargs["on"] = self._parse_conjunction() 3006 elif joins and self._match(TokenType.USING): 3007 kwargs["using"] = self._parse_wrapped_id_vars() 3008 else: 3009 joins = None 3010 self._retreat(index) 3011 3012 kwargs["this"].set("joins", joins if joins else None) 3013 3014 comments = [c for token in (method, side, kind) if token for c in token.comments] 3015 return self.expression(exp.Join, comments=comments, **kwargs) 3016 3017 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3018 this = self._parse_conjunction() 3019 3020 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3021 return this 3022 3023 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3024 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3025 3026 return this 3027 3028 def _parse_index_params(self) -> exp.IndexParameters: 3029 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3030 3031 if self._match(TokenType.L_PAREN, advance=False): 3032 columns = self._parse_wrapped_csv(self._parse_with_operator) 3033 else: 3034 columns = None 3035 3036 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3037 partition_by = self._parse_partition_by() 3038 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3039 tablespace = ( 3040 self._parse_var(any_token=True) 3041 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3042 else None 3043 ) 3044 where = self._parse_where() 3045 3046 return self.expression( 3047 exp.IndexParameters, 3048 using=using, 3049 columns=columns, 3050 include=include, 3051 partition_by=partition_by, 3052 where=where, 3053 with_storage=with_storage, 3054 tablespace=tablespace, 3055 ) 3056 3057 def _parse_index( 3058 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3059 ) -> t.Optional[exp.Index]: 3060 if index or anonymous: 3061 unique = None 3062 primary = None 3063 amp = None 3064 3065 self._match(TokenType.ON) 3066 self._match(TokenType.TABLE) # hive 3067 table = self._parse_table_parts(schema=True) 3068 else: 3069 unique = self._match(TokenType.UNIQUE) 3070 primary = self._match_text_seq("PRIMARY") 3071 amp = self._match_text_seq("AMP") 3072 3073 if not self._match(TokenType.INDEX): 3074 return None 3075 3076 index = self._parse_id_var() 3077 table = None 3078 3079 params = self._parse_index_params() 3080 3081 return self.expression( 3082 exp.Index, 3083 this=index, 3084 table=table, 3085 unique=unique, 3086 primary=primary, 3087 amp=amp, 3088 params=params, 3089 ) 3090 3091 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3092 hints: t.List[exp.Expression] = [] 3093 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3094 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3095 hints.append( 3096 self.expression( 3097 exp.WithTableHint, 3098 expressions=self._parse_csv( 3099 lambda: self._parse_function() or self._parse_var(any_token=True) 3100 ), 3101 ) 3102 ) 3103 self._match_r_paren() 3104 else: 3105 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3106 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3107 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3108 3109 self._match_texts(("INDEX", "KEY")) 3110 if self._match(TokenType.FOR): 3111 hint.set("target", self._advance_any() and self._prev.text.upper()) 3112 3113 hint.set("expressions", self._parse_wrapped_id_vars()) 3114 hints.append(hint) 3115 3116 return hints or None 3117 3118 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3119 return ( 3120 (not schema and self._parse_function(optional_parens=False)) 3121 or self._parse_id_var(any_token=False) 3122 or self._parse_string_as_identifier() 3123 or self._parse_placeholder() 3124 ) 3125 3126 def _parse_table_parts( 3127 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3128 ) -> exp.Table: 3129 catalog = None 3130 db = None 3131 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3132 3133 while self._match(TokenType.DOT): 3134 if catalog: 3135 # This allows nesting the table in arbitrarily many dot expressions if needed 3136 table = self.expression( 3137 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3138 ) 3139 else: 3140 catalog = db 3141 db = table 3142 # "" used for tsql FROM a..b case 3143 table = self._parse_table_part(schema=schema) or "" 3144 3145 if ( 3146 wildcard 3147 and self._is_connected() 3148 and (isinstance(table, exp.Identifier) or not table) 3149 and self._match(TokenType.STAR) 3150 ): 3151 if isinstance(table, exp.Identifier): 3152 table.args["this"] += "*" 3153 else: 3154 table = exp.Identifier(this="*") 3155 3156 # We bubble up comments from the Identifier to the Table 3157 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3158 3159 if is_db_reference: 3160 catalog = db 3161 db = table 3162 table = None 3163 3164 if not table and not is_db_reference: 3165 self.raise_error(f"Expected table name but got {self._curr}") 3166 if not db and is_db_reference: 3167 self.raise_error(f"Expected database name but got {self._curr}") 3168 3169 return self.expression( 3170 exp.Table, 3171 comments=comments, 3172 this=table, 3173 db=db, 3174 catalog=catalog, 3175 pivots=self._parse_pivots(), 3176 ) 3177 3178 def _parse_table( 3179 self, 3180 schema: bool = False, 3181 joins: bool = False, 3182 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3183 parse_bracket: bool = False, 3184 is_db_reference: bool = False, 3185 parse_partition: bool = False, 3186 ) -> t.Optional[exp.Expression]: 3187 lateral = self._parse_lateral() 3188 if lateral: 3189 return lateral 3190 3191 unnest = self._parse_unnest() 3192 if unnest: 3193 return unnest 3194 3195 values = self._parse_derived_table_values() 3196 if values: 3197 return values 3198 3199 subquery = self._parse_select(table=True) 3200 if subquery: 3201 if not subquery.args.get("pivots"): 3202 subquery.set("pivots", self._parse_pivots()) 3203 return subquery 3204 3205 bracket = parse_bracket and self._parse_bracket(None) 3206 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3207 3208 only = self._match(TokenType.ONLY) 3209 3210 this = t.cast( 3211 exp.Expression, 3212 bracket 3213 or self._parse_bracket( 3214 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3215 ), 3216 ) 3217 3218 if only: 3219 this.set("only", only) 3220 3221 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3222 self._match_text_seq("*") 3223 3224 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3225 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3226 this.set("partition", self._parse_partition()) 3227 3228 if schema: 3229 return self._parse_schema(this=this) 3230 3231 version = self._parse_version() 3232 3233 if version: 3234 this.set("version", version) 3235 3236 if self.dialect.ALIAS_POST_TABLESAMPLE: 3237 table_sample = self._parse_table_sample() 3238 3239 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3240 if alias: 3241 this.set("alias", alias) 3242 3243 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3244 return self.expression( 3245 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3246 ) 3247 3248 this.set("hints", self._parse_table_hints()) 3249 3250 if not this.args.get("pivots"): 3251 this.set("pivots", self._parse_pivots()) 3252 3253 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3254 table_sample = self._parse_table_sample() 3255 3256 if table_sample: 3257 table_sample.set("this", this) 3258 this = table_sample 3259 3260 if joins: 3261 for join in self._parse_joins(): 3262 this.append("joins", join) 3263 3264 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3265 this.set("ordinality", True) 3266 this.set("alias", self._parse_table_alias()) 3267 3268 return this 3269 3270 def _parse_version(self) -> t.Optional[exp.Version]: 3271 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3272 this = "TIMESTAMP" 3273 elif self._match(TokenType.VERSION_SNAPSHOT): 3274 this = "VERSION" 3275 else: 3276 return None 3277 3278 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3279 kind = self._prev.text.upper() 3280 start = self._parse_bitwise() 3281 self._match_texts(("TO", "AND")) 3282 end = self._parse_bitwise() 3283 expression: t.Optional[exp.Expression] = self.expression( 3284 exp.Tuple, expressions=[start, end] 3285 ) 3286 elif self._match_text_seq("CONTAINED", "IN"): 3287 kind = "CONTAINED IN" 3288 expression = self.expression( 3289 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3290 ) 3291 elif self._match(TokenType.ALL): 3292 kind = "ALL" 3293 expression = None 3294 else: 3295 self._match_text_seq("AS", "OF") 3296 kind = "AS OF" 3297 expression = self._parse_type() 3298 3299 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3300 3301 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3302 if not self._match(TokenType.UNNEST): 3303 return None 3304 3305 expressions = self._parse_wrapped_csv(self._parse_equality) 3306 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3307 3308 alias = self._parse_table_alias() if with_alias else None 3309 3310 if alias: 3311 if self.dialect.UNNEST_COLUMN_ONLY: 3312 if alias.args.get("columns"): 3313 self.raise_error("Unexpected extra column alias in unnest.") 3314 3315 alias.set("columns", [alias.this]) 3316 alias.set("this", None) 3317 3318 columns = alias.args.get("columns") or [] 3319 if offset and len(expressions) < len(columns): 3320 offset = columns.pop() 3321 3322 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3323 self._match(TokenType.ALIAS) 3324 offset = self._parse_id_var( 3325 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3326 ) or exp.to_identifier("offset") 3327 3328 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3329 3330 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3331 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3332 if not is_derived and not self._match_text_seq("VALUES"): 3333 return None 3334 3335 expressions = self._parse_csv(self._parse_value) 3336 alias = self._parse_table_alias() 3337 3338 if is_derived: 3339 self._match_r_paren() 3340 3341 return self.expression( 3342 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3343 ) 3344 3345 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3346 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3347 as_modifier and self._match_text_seq("USING", "SAMPLE") 3348 ): 3349 return None 3350 3351 bucket_numerator = None 3352 bucket_denominator = None 3353 bucket_field = None 3354 percent = None 3355 size = None 3356 seed = None 3357 3358 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3359 matched_l_paren = self._match(TokenType.L_PAREN) 3360 3361 if self.TABLESAMPLE_CSV: 3362 num = None 3363 expressions = self._parse_csv(self._parse_primary) 3364 else: 3365 expressions = None 3366 num = ( 3367 self._parse_factor() 3368 if self._match(TokenType.NUMBER, advance=False) 3369 else self._parse_primary() or self._parse_placeholder() 3370 ) 3371 3372 if self._match_text_seq("BUCKET"): 3373 bucket_numerator = self._parse_number() 3374 self._match_text_seq("OUT", "OF") 3375 bucket_denominator = bucket_denominator = self._parse_number() 3376 self._match(TokenType.ON) 3377 bucket_field = self._parse_field() 3378 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3379 percent = num 3380 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3381 size = num 3382 else: 3383 percent = num 3384 3385 if matched_l_paren: 3386 self._match_r_paren() 3387 3388 if self._match(TokenType.L_PAREN): 3389 method = self._parse_var(upper=True) 3390 seed = self._match(TokenType.COMMA) and self._parse_number() 3391 self._match_r_paren() 3392 elif self._match_texts(("SEED", "REPEATABLE")): 3393 seed = self._parse_wrapped(self._parse_number) 3394 3395 return self.expression( 3396 exp.TableSample, 3397 expressions=expressions, 3398 method=method, 3399 bucket_numerator=bucket_numerator, 3400 bucket_denominator=bucket_denominator, 3401 bucket_field=bucket_field, 3402 percent=percent, 3403 size=size, 3404 seed=seed, 3405 ) 3406 3407 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3408 return list(iter(self._parse_pivot, None)) or None 3409 3410 def _parse_joins(self) -> t.Iterator[exp.Join]: 3411 return iter(self._parse_join, None) 3412 3413 # https://duckdb.org/docs/sql/statements/pivot 3414 def _parse_simplified_pivot(self) -> exp.Pivot: 3415 def _parse_on() -> t.Optional[exp.Expression]: 3416 this = self._parse_bitwise() 3417 return self._parse_in(this) if self._match(TokenType.IN) else this 3418 3419 this = self._parse_table() 3420 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3421 using = self._match(TokenType.USING) and self._parse_csv( 3422 lambda: self._parse_alias(self._parse_function()) 3423 ) 3424 group = self._parse_group() 3425 return self.expression( 3426 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3427 ) 3428 3429 def _parse_pivot_in(self) -> exp.In: 3430 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3431 this = self._parse_conjunction() 3432 3433 self._match(TokenType.ALIAS) 3434 alias = self._parse_field() 3435 if alias: 3436 return self.expression(exp.PivotAlias, this=this, alias=alias) 3437 3438 return this 3439 3440 value = self._parse_column() 3441 3442 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3443 self.raise_error("Expecting IN (") 3444 3445 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3446 3447 self._match_r_paren() 3448 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3449 3450 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3451 index = self._index 3452 include_nulls = None 3453 3454 if self._match(TokenType.PIVOT): 3455 unpivot = False 3456 elif self._match(TokenType.UNPIVOT): 3457 unpivot = True 3458 3459 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3460 if self._match_text_seq("INCLUDE", "NULLS"): 3461 include_nulls = True 3462 elif self._match_text_seq("EXCLUDE", "NULLS"): 3463 include_nulls = False 3464 else: 3465 return None 3466 3467 expressions = [] 3468 3469 if not self._match(TokenType.L_PAREN): 3470 self._retreat(index) 3471 return None 3472 3473 if unpivot: 3474 expressions = self._parse_csv(self._parse_column) 3475 else: 3476 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3477 3478 if not expressions: 3479 self.raise_error("Failed to parse PIVOT's aggregation list") 3480 3481 if not self._match(TokenType.FOR): 3482 self.raise_error("Expecting FOR") 3483 3484 field = self._parse_pivot_in() 3485 3486 self._match_r_paren() 3487 3488 pivot = self.expression( 3489 exp.Pivot, 3490 expressions=expressions, 3491 field=field, 3492 unpivot=unpivot, 3493 include_nulls=include_nulls, 3494 ) 3495 3496 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3497 pivot.set("alias", self._parse_table_alias()) 3498 3499 if not unpivot: 3500 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3501 3502 columns: t.List[exp.Expression] = [] 3503 for fld in pivot.args["field"].expressions: 3504 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3505 for name in names: 3506 if self.PREFIXED_PIVOT_COLUMNS: 3507 name = f"{name}_{field_name}" if name else field_name 3508 else: 3509 name = f"{field_name}_{name}" if name else field_name 3510 3511 columns.append(exp.to_identifier(name)) 3512 3513 pivot.set("columns", columns) 3514 3515 return pivot 3516 3517 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3518 return [agg.alias for agg in aggregations] 3519 3520 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3521 if not skip_where_token and not self._match(TokenType.PREWHERE): 3522 return None 3523 3524 return self.expression( 3525 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3526 ) 3527 3528 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3529 if not skip_where_token and not self._match(TokenType.WHERE): 3530 return None 3531 3532 return self.expression( 3533 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3534 ) 3535 3536 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3537 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3538 return None 3539 3540 elements: t.Dict[str, t.Any] = defaultdict(list) 3541 3542 if self._match(TokenType.ALL): 3543 elements["all"] = True 3544 elif self._match(TokenType.DISTINCT): 3545 elements["all"] = False 3546 3547 while True: 3548 expressions = self._parse_csv(self._parse_conjunction) 3549 if expressions: 3550 elements["expressions"].extend(expressions) 3551 3552 grouping_sets = self._parse_grouping_sets() 3553 if grouping_sets: 3554 elements["grouping_sets"].extend(grouping_sets) 3555 3556 rollup = None 3557 cube = None 3558 totals = None 3559 3560 index = self._index 3561 with_ = self._match(TokenType.WITH) 3562 if self._match(TokenType.ROLLUP): 3563 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3564 elements["rollup"].extend(ensure_list(rollup)) 3565 3566 if self._match(TokenType.CUBE): 3567 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3568 elements["cube"].extend(ensure_list(cube)) 3569 3570 if self._match_text_seq("TOTALS"): 3571 totals = True 3572 elements["totals"] = True # type: ignore 3573 3574 if not (grouping_sets or rollup or cube or totals): 3575 if with_: 3576 self._retreat(index) 3577 break 3578 3579 return self.expression(exp.Group, **elements) # type: ignore 3580 3581 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3582 if not self._match(TokenType.GROUPING_SETS): 3583 return None 3584 3585 return self._parse_wrapped_csv(self._parse_grouping_set) 3586 3587 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3588 if self._match(TokenType.L_PAREN): 3589 grouping_set = self._parse_csv(self._parse_column) 3590 self._match_r_paren() 3591 return self.expression(exp.Tuple, expressions=grouping_set) 3592 3593 return self._parse_column() 3594 3595 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3596 if not skip_having_token and not self._match(TokenType.HAVING): 3597 return None 3598 return self.expression(exp.Having, this=self._parse_conjunction()) 3599 3600 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3601 if not self._match(TokenType.QUALIFY): 3602 return None 3603 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3604 3605 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3606 if skip_start_token: 3607 start = None 3608 elif self._match(TokenType.START_WITH): 3609 start = self._parse_conjunction() 3610 else: 3611 return None 3612 3613 self._match(TokenType.CONNECT_BY) 3614 nocycle = self._match_text_seq("NOCYCLE") 3615 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3616 exp.Prior, this=self._parse_bitwise() 3617 ) 3618 connect = self._parse_conjunction() 3619 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3620 3621 if not start and self._match(TokenType.START_WITH): 3622 start = self._parse_conjunction() 3623 3624 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3625 3626 def _parse_name_as_expression(self) -> exp.Alias: 3627 return self.expression( 3628 exp.Alias, 3629 alias=self._parse_id_var(any_token=True), 3630 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3631 ) 3632 3633 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3634 if self._match_text_seq("INTERPOLATE"): 3635 return self._parse_wrapped_csv(self._parse_name_as_expression) 3636 return None 3637 3638 def _parse_order( 3639 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3640 ) -> t.Optional[exp.Expression]: 3641 siblings = None 3642 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3643 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3644 return this 3645 3646 siblings = True 3647 3648 return self.expression( 3649 exp.Order, 3650 this=this, 3651 expressions=self._parse_csv(self._parse_ordered), 3652 interpolate=self._parse_interpolate(), 3653 siblings=siblings, 3654 ) 3655 3656 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3657 if not self._match(token): 3658 return None 3659 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3660 3661 def _parse_ordered( 3662 self, parse_method: t.Optional[t.Callable] = None 3663 ) -> t.Optional[exp.Ordered]: 3664 this = parse_method() if parse_method else self._parse_conjunction() 3665 if not this: 3666 return None 3667 3668 asc = self._match(TokenType.ASC) 3669 desc = self._match(TokenType.DESC) or (asc and False) 3670 3671 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3672 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3673 3674 nulls_first = is_nulls_first or False 3675 explicitly_null_ordered = is_nulls_first or is_nulls_last 3676 3677 if ( 3678 not explicitly_null_ordered 3679 and ( 3680 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3681 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3682 ) 3683 and self.dialect.NULL_ORDERING != "nulls_are_last" 3684 ): 3685 nulls_first = True 3686 3687 if self._match_text_seq("WITH", "FILL"): 3688 with_fill = self.expression( 3689 exp.WithFill, 3690 **{ # type: ignore 3691 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3692 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3693 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3694 }, 3695 ) 3696 else: 3697 with_fill = None 3698 3699 return self.expression( 3700 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3701 ) 3702 3703 def _parse_limit( 3704 self, 3705 this: t.Optional[exp.Expression] = None, 3706 top: bool = False, 3707 skip_limit_token: bool = False, 3708 ) -> t.Optional[exp.Expression]: 3709 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3710 comments = self._prev_comments 3711 if top: 3712 limit_paren = self._match(TokenType.L_PAREN) 3713 expression = self._parse_term() if limit_paren else self._parse_number() 3714 3715 if limit_paren: 3716 self._match_r_paren() 3717 else: 3718 expression = self._parse_term() 3719 3720 if self._match(TokenType.COMMA): 3721 offset = expression 3722 expression = self._parse_term() 3723 else: 3724 offset = None 3725 3726 limit_exp = self.expression( 3727 exp.Limit, 3728 this=this, 3729 expression=expression, 3730 offset=offset, 3731 comments=comments, 3732 expressions=self._parse_limit_by(), 3733 ) 3734 3735 return limit_exp 3736 3737 if self._match(TokenType.FETCH): 3738 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3739 direction = self._prev.text.upper() if direction else "FIRST" 3740 3741 count = self._parse_field(tokens=self.FETCH_TOKENS) 3742 percent = self._match(TokenType.PERCENT) 3743 3744 self._match_set((TokenType.ROW, TokenType.ROWS)) 3745 3746 only = self._match_text_seq("ONLY") 3747 with_ties = self._match_text_seq("WITH", "TIES") 3748 3749 if only and with_ties: 3750 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3751 3752 return self.expression( 3753 exp.Fetch, 3754 direction=direction, 3755 count=count, 3756 percent=percent, 3757 with_ties=with_ties, 3758 ) 3759 3760 return this 3761 3762 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3763 if not self._match(TokenType.OFFSET): 3764 return this 3765 3766 count = self._parse_term() 3767 self._match_set((TokenType.ROW, TokenType.ROWS)) 3768 3769 return self.expression( 3770 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3771 ) 3772 3773 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3774 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3775 3776 def _parse_locks(self) -> t.List[exp.Lock]: 3777 locks = [] 3778 while True: 3779 if self._match_text_seq("FOR", "UPDATE"): 3780 update = True 3781 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3782 "LOCK", "IN", "SHARE", "MODE" 3783 ): 3784 update = False 3785 else: 3786 break 3787 3788 expressions = None 3789 if self._match_text_seq("OF"): 3790 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3791 3792 wait: t.Optional[bool | exp.Expression] = None 3793 if self._match_text_seq("NOWAIT"): 3794 wait = True 3795 elif self._match_text_seq("WAIT"): 3796 wait = self._parse_primary() 3797 elif self._match_text_seq("SKIP", "LOCKED"): 3798 wait = False 3799 3800 locks.append( 3801 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3802 ) 3803 3804 return locks 3805 3806 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3807 while this and self._match_set(self.SET_OPERATIONS): 3808 token_type = self._prev.token_type 3809 3810 if token_type == TokenType.UNION: 3811 operation = exp.Union 3812 elif token_type == TokenType.EXCEPT: 3813 operation = exp.Except 3814 else: 3815 operation = exp.Intersect 3816 3817 comments = self._prev.comments 3818 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3819 by_name = self._match_text_seq("BY", "NAME") 3820 expression = self._parse_select(nested=True, parse_set_operation=False) 3821 3822 this = self.expression( 3823 operation, 3824 comments=comments, 3825 this=this, 3826 distinct=distinct, 3827 by_name=by_name, 3828 expression=expression, 3829 ) 3830 3831 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3832 expression = this.expression 3833 3834 if expression: 3835 for arg in self.UNION_MODIFIERS: 3836 expr = expression.args.get(arg) 3837 if expr: 3838 this.set(arg, expr.pop()) 3839 3840 return this 3841 3842 def _parse_expression(self) -> t.Optional[exp.Expression]: 3843 return self._parse_alias(self._parse_conjunction()) 3844 3845 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3846 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3847 3848 def _parse_equality(self) -> t.Optional[exp.Expression]: 3849 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3850 3851 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3852 return self._parse_tokens(self._parse_range, self.COMPARISON) 3853 3854 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3855 this = this or self._parse_bitwise() 3856 negate = self._match(TokenType.NOT) 3857 3858 if self._match_set(self.RANGE_PARSERS): 3859 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3860 if not expression: 3861 return this 3862 3863 this = expression 3864 elif self._match(TokenType.ISNULL): 3865 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3866 3867 # Postgres supports ISNULL and NOTNULL for conditions. 3868 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3869 if self._match(TokenType.NOTNULL): 3870 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3871 this = self.expression(exp.Not, this=this) 3872 3873 if negate: 3874 this = self.expression(exp.Not, this=this) 3875 3876 if self._match(TokenType.IS): 3877 this = self._parse_is(this) 3878 3879 return this 3880 3881 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3882 index = self._index - 1 3883 negate = self._match(TokenType.NOT) 3884 3885 if self._match_text_seq("DISTINCT", "FROM"): 3886 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3887 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3888 3889 expression = self._parse_null() or self._parse_boolean() 3890 if not expression: 3891 self._retreat(index) 3892 return None 3893 3894 this = self.expression(exp.Is, this=this, expression=expression) 3895 return self.expression(exp.Not, this=this) if negate else this 3896 3897 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3898 unnest = self._parse_unnest(with_alias=False) 3899 if unnest: 3900 this = self.expression(exp.In, this=this, unnest=unnest) 3901 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3902 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3903 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3904 3905 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3906 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 3907 else: 3908 this = self.expression(exp.In, this=this, expressions=expressions) 3909 3910 if matched_l_paren: 3911 self._match_r_paren(this) 3912 elif not self._match(TokenType.R_BRACKET, expression=this): 3913 self.raise_error("Expecting ]") 3914 else: 3915 this = self.expression(exp.In, this=this, field=self._parse_field()) 3916 3917 return this 3918 3919 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3920 low = self._parse_bitwise() 3921 self._match(TokenType.AND) 3922 high = self._parse_bitwise() 3923 return self.expression(exp.Between, this=this, low=low, high=high) 3924 3925 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3926 if not self._match(TokenType.ESCAPE): 3927 return this 3928 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3929 3930 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3931 index = self._index 3932 3933 if not self._match(TokenType.INTERVAL) and match_interval: 3934 return None 3935 3936 if self._match(TokenType.STRING, advance=False): 3937 this = self._parse_primary() 3938 else: 3939 this = self._parse_term() 3940 3941 if not this or ( 3942 isinstance(this, exp.Column) 3943 and not this.table 3944 and not this.this.quoted 3945 and this.name.upper() == "IS" 3946 ): 3947 self._retreat(index) 3948 return None 3949 3950 unit = self._parse_function() or ( 3951 not self._match(TokenType.ALIAS, advance=False) 3952 and self._parse_var(any_token=True, upper=True) 3953 ) 3954 3955 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3956 # each INTERVAL expression into this canonical form so it's easy to transpile 3957 if this and this.is_number: 3958 this = exp.Literal.string(this.name) 3959 elif this and this.is_string: 3960 parts = this.name.split() 3961 3962 if len(parts) == 2: 3963 if unit: 3964 # This is not actually a unit, it's something else (e.g. a "window side") 3965 unit = None 3966 self._retreat(self._index - 1) 3967 3968 this = exp.Literal.string(parts[0]) 3969 unit = self.expression(exp.Var, this=parts[1].upper()) 3970 3971 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 3972 unit = self.expression( 3973 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 3974 ) 3975 3976 return self.expression(exp.Interval, this=this, unit=unit) 3977 3978 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3979 this = self._parse_term() 3980 3981 while True: 3982 if self._match_set(self.BITWISE): 3983 this = self.expression( 3984 self.BITWISE[self._prev.token_type], 3985 this=this, 3986 expression=self._parse_term(), 3987 ) 3988 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3989 this = self.expression( 3990 exp.DPipe, 3991 this=this, 3992 expression=self._parse_term(), 3993 safe=not self.dialect.STRICT_STRING_CONCAT, 3994 ) 3995 elif self._match(TokenType.DQMARK): 3996 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3997 elif self._match_pair(TokenType.LT, TokenType.LT): 3998 this = self.expression( 3999 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4000 ) 4001 elif self._match_pair(TokenType.GT, TokenType.GT): 4002 this = self.expression( 4003 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4004 ) 4005 else: 4006 break 4007 4008 return this 4009 4010 def _parse_term(self) -> t.Optional[exp.Expression]: 4011 return self._parse_tokens(self._parse_factor, self.TERM) 4012 4013 def _parse_factor(self) -> t.Optional[exp.Expression]: 4014 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4015 this = parse_method() 4016 4017 while self._match_set(self.FACTOR): 4018 this = self.expression( 4019 self.FACTOR[self._prev.token_type], 4020 this=this, 4021 comments=self._prev_comments, 4022 expression=parse_method(), 4023 ) 4024 if isinstance(this, exp.Div): 4025 this.args["typed"] = self.dialect.TYPED_DIVISION 4026 this.args["safe"] = self.dialect.SAFE_DIVISION 4027 4028 return this 4029 4030 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4031 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4032 4033 def _parse_unary(self) -> t.Optional[exp.Expression]: 4034 if self._match_set(self.UNARY_PARSERS): 4035 return self.UNARY_PARSERS[self._prev.token_type](self) 4036 return self._parse_at_time_zone(self._parse_type()) 4037 4038 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 4039 interval = parse_interval and self._parse_interval() 4040 if interval: 4041 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4042 while True: 4043 index = self._index 4044 self._match(TokenType.PLUS) 4045 4046 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4047 self._retreat(index) 4048 break 4049 4050 interval = self.expression( # type: ignore 4051 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4052 ) 4053 4054 return interval 4055 4056 index = self._index 4057 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4058 this = self._parse_column() 4059 4060 if data_type: 4061 if isinstance(this, exp.Literal): 4062 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4063 if parser: 4064 return parser(self, this, data_type) 4065 return self.expression(exp.Cast, this=this, to=data_type) 4066 if not data_type.expressions: 4067 self._retreat(index) 4068 return self._parse_column() 4069 return self._parse_column_ops(data_type) 4070 4071 return this and self._parse_column_ops(this) 4072 4073 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4074 this = self._parse_type() 4075 if not this: 4076 return None 4077 4078 if isinstance(this, exp.Column) and not this.table: 4079 this = exp.var(this.name.upper()) 4080 4081 return self.expression( 4082 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4083 ) 4084 4085 def _parse_types( 4086 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4087 ) -> t.Optional[exp.Expression]: 4088 index = self._index 4089 4090 prefix = self._match_text_seq("SYSUDTLIB", ".") 4091 4092 if not self._match_set(self.TYPE_TOKENS): 4093 identifier = allow_identifiers and self._parse_id_var( 4094 any_token=False, tokens=(TokenType.VAR,) 4095 ) 4096 if identifier: 4097 tokens = self.dialect.tokenize(identifier.name) 4098 4099 if len(tokens) != 1: 4100 self.raise_error("Unexpected identifier", self._prev) 4101 4102 if tokens[0].token_type in self.TYPE_TOKENS: 4103 self._prev = tokens[0] 4104 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4105 type_name = identifier.name 4106 4107 while self._match(TokenType.DOT): 4108 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4109 4110 return exp.DataType.build(type_name, udt=True) 4111 else: 4112 self._retreat(self._index - 1) 4113 return None 4114 else: 4115 return None 4116 4117 type_token = self._prev.token_type 4118 4119 if type_token == TokenType.PSEUDO_TYPE: 4120 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4121 4122 if type_token == TokenType.OBJECT_IDENTIFIER: 4123 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4124 4125 nested = type_token in self.NESTED_TYPE_TOKENS 4126 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4127 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4128 expressions = None 4129 maybe_func = False 4130 4131 if self._match(TokenType.L_PAREN): 4132 if is_struct: 4133 expressions = self._parse_csv(self._parse_struct_types) 4134 elif nested: 4135 expressions = self._parse_csv( 4136 lambda: self._parse_types( 4137 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4138 ) 4139 ) 4140 elif type_token in self.ENUM_TYPE_TOKENS: 4141 expressions = self._parse_csv(self._parse_equality) 4142 elif is_aggregate: 4143 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4144 any_token=False, tokens=(TokenType.VAR,) 4145 ) 4146 if not func_or_ident or not self._match(TokenType.COMMA): 4147 return None 4148 expressions = self._parse_csv( 4149 lambda: self._parse_types( 4150 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4151 ) 4152 ) 4153 expressions.insert(0, func_or_ident) 4154 else: 4155 expressions = self._parse_csv(self._parse_type_size) 4156 4157 if not expressions or not self._match(TokenType.R_PAREN): 4158 self._retreat(index) 4159 return None 4160 4161 maybe_func = True 4162 4163 this: t.Optional[exp.Expression] = None 4164 values: t.Optional[t.List[exp.Expression]] = None 4165 4166 if nested and self._match(TokenType.LT): 4167 if is_struct: 4168 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4169 else: 4170 expressions = self._parse_csv( 4171 lambda: self._parse_types( 4172 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4173 ) 4174 ) 4175 4176 if not self._match(TokenType.GT): 4177 self.raise_error("Expecting >") 4178 4179 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4180 values = self._parse_csv(self._parse_conjunction) 4181 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4182 4183 if type_token in self.TIMESTAMPS: 4184 if self._match_text_seq("WITH", "TIME", "ZONE"): 4185 maybe_func = False 4186 tz_type = ( 4187 exp.DataType.Type.TIMETZ 4188 if type_token in self.TIMES 4189 else exp.DataType.Type.TIMESTAMPTZ 4190 ) 4191 this = exp.DataType(this=tz_type, expressions=expressions) 4192 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4193 maybe_func = False 4194 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4195 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4196 maybe_func = False 4197 elif type_token == TokenType.INTERVAL: 4198 unit = self._parse_var(upper=True) 4199 if unit: 4200 if self._match_text_seq("TO"): 4201 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4202 4203 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4204 else: 4205 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4206 4207 if maybe_func and check_func: 4208 index2 = self._index 4209 peek = self._parse_string() 4210 4211 if not peek: 4212 self._retreat(index) 4213 return None 4214 4215 self._retreat(index2) 4216 4217 if not this: 4218 if self._match_text_seq("UNSIGNED"): 4219 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4220 if not unsigned_type_token: 4221 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4222 4223 type_token = unsigned_type_token or type_token 4224 4225 this = exp.DataType( 4226 this=exp.DataType.Type[type_token.value], 4227 expressions=expressions, 4228 nested=nested, 4229 values=values, 4230 prefix=prefix, 4231 ) 4232 4233 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4234 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4235 4236 return this 4237 4238 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4239 index = self._index 4240 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4241 self._match(TokenType.COLON) 4242 column_def = self._parse_column_def(this) 4243 4244 if type_required and ( 4245 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4246 ): 4247 self._retreat(index) 4248 return self._parse_types() 4249 4250 return column_def 4251 4252 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4253 if not self._match_text_seq("AT", "TIME", "ZONE"): 4254 return this 4255 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4256 4257 def _parse_column(self) -> t.Optional[exp.Expression]: 4258 this = self._parse_column_reference() 4259 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4260 4261 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4262 this = self._parse_field() 4263 if ( 4264 not this 4265 and self._match(TokenType.VALUES, advance=False) 4266 and self.VALUES_FOLLOWED_BY_PAREN 4267 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4268 ): 4269 this = self._parse_id_var() 4270 4271 if isinstance(this, exp.Identifier): 4272 # We bubble up comments from the Identifier to the Column 4273 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4274 4275 return this 4276 4277 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4278 this = self._parse_bracket(this) 4279 4280 while self._match_set(self.COLUMN_OPERATORS): 4281 op_token = self._prev.token_type 4282 op = self.COLUMN_OPERATORS.get(op_token) 4283 4284 if op_token == TokenType.DCOLON: 4285 field = self._parse_types() 4286 if not field: 4287 self.raise_error("Expected type") 4288 elif op and self._curr: 4289 field = self._parse_column_reference() 4290 else: 4291 field = self._parse_field(any_token=True, anonymous_func=True) 4292 4293 if isinstance(field, exp.Func) and this: 4294 # bigquery allows function calls like x.y.count(...) 4295 # SAFE.SUBSTR(...) 4296 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4297 this = exp.replace_tree( 4298 this, 4299 lambda n: ( 4300 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4301 if n.table 4302 else n.this 4303 ) 4304 if isinstance(n, exp.Column) 4305 else n, 4306 ) 4307 4308 if op: 4309 this = op(self, this, field) 4310 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4311 this = self.expression( 4312 exp.Column, 4313 this=field, 4314 table=this.this, 4315 db=this.args.get("table"), 4316 catalog=this.args.get("db"), 4317 ) 4318 else: 4319 this = self.expression(exp.Dot, this=this, expression=field) 4320 this = self._parse_bracket(this) 4321 return this 4322 4323 def _parse_primary(self) -> t.Optional[exp.Expression]: 4324 if self._match_set(self.PRIMARY_PARSERS): 4325 token_type = self._prev.token_type 4326 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4327 4328 if token_type == TokenType.STRING: 4329 expressions = [primary] 4330 while self._match(TokenType.STRING): 4331 expressions.append(exp.Literal.string(self._prev.text)) 4332 4333 if len(expressions) > 1: 4334 return self.expression(exp.Concat, expressions=expressions) 4335 4336 return primary 4337 4338 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4339 return exp.Literal.number(f"0.{self._prev.text}") 4340 4341 if self._match(TokenType.L_PAREN): 4342 comments = self._prev_comments 4343 query = self._parse_select() 4344 4345 if query: 4346 expressions = [query] 4347 else: 4348 expressions = self._parse_expressions() 4349 4350 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4351 4352 if not this and self._match(TokenType.R_PAREN, advance=False): 4353 this = self.expression(exp.Tuple) 4354 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4355 this = self._parse_subquery(this=this, parse_alias=False) 4356 elif isinstance(this, exp.Subquery): 4357 this = self._parse_subquery( 4358 this=self._parse_set_operations(this), parse_alias=False 4359 ) 4360 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4361 this = self.expression(exp.Tuple, expressions=expressions) 4362 else: 4363 this = self.expression(exp.Paren, this=this) 4364 4365 if this: 4366 this.add_comments(comments) 4367 4368 self._match_r_paren(expression=this) 4369 return this 4370 4371 return None 4372 4373 def _parse_field( 4374 self, 4375 any_token: bool = False, 4376 tokens: t.Optional[t.Collection[TokenType]] = None, 4377 anonymous_func: bool = False, 4378 ) -> t.Optional[exp.Expression]: 4379 if anonymous_func: 4380 field = ( 4381 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4382 or self._parse_primary() 4383 ) 4384 else: 4385 field = self._parse_primary() or self._parse_function( 4386 anonymous=anonymous_func, any_token=any_token 4387 ) 4388 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4389 4390 def _parse_function( 4391 self, 4392 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4393 anonymous: bool = False, 4394 optional_parens: bool = True, 4395 any_token: bool = False, 4396 ) -> t.Optional[exp.Expression]: 4397 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4398 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4399 fn_syntax = False 4400 if ( 4401 self._match(TokenType.L_BRACE, advance=False) 4402 and self._next 4403 and self._next.text.upper() == "FN" 4404 ): 4405 self._advance(2) 4406 fn_syntax = True 4407 4408 func = self._parse_function_call( 4409 functions=functions, 4410 anonymous=anonymous, 4411 optional_parens=optional_parens, 4412 any_token=any_token, 4413 ) 4414 4415 if fn_syntax: 4416 self._match(TokenType.R_BRACE) 4417 4418 return func 4419 4420 def _parse_function_call( 4421 self, 4422 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4423 anonymous: bool = False, 4424 optional_parens: bool = True, 4425 any_token: bool = False, 4426 ) -> t.Optional[exp.Expression]: 4427 if not self._curr: 4428 return None 4429 4430 comments = self._curr.comments 4431 token_type = self._curr.token_type 4432 this = self._curr.text 4433 upper = this.upper() 4434 4435 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4436 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4437 self._advance() 4438 return self._parse_window(parser(self)) 4439 4440 if not self._next or self._next.token_type != TokenType.L_PAREN: 4441 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4442 self._advance() 4443 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4444 4445 return None 4446 4447 if any_token: 4448 if token_type in self.RESERVED_TOKENS: 4449 return None 4450 elif token_type not in self.FUNC_TOKENS: 4451 return None 4452 4453 self._advance(2) 4454 4455 parser = self.FUNCTION_PARSERS.get(upper) 4456 if parser and not anonymous: 4457 this = parser(self) 4458 else: 4459 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4460 4461 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4462 this = self.expression(subquery_predicate, this=self._parse_select()) 4463 self._match_r_paren() 4464 return this 4465 4466 if functions is None: 4467 functions = self.FUNCTIONS 4468 4469 function = functions.get(upper) 4470 4471 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4472 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4473 4474 if alias: 4475 args = self._kv_to_prop_eq(args) 4476 4477 if function and not anonymous: 4478 if "dialect" in function.__code__.co_varnames: 4479 func = function(args, dialect=self.dialect) 4480 else: 4481 func = function(args) 4482 4483 func = self.validate_expression(func, args) 4484 if not self.dialect.NORMALIZE_FUNCTIONS: 4485 func.meta["name"] = this 4486 4487 this = func 4488 else: 4489 if token_type == TokenType.IDENTIFIER: 4490 this = exp.Identifier(this=this, quoted=True) 4491 this = self.expression(exp.Anonymous, this=this, expressions=args) 4492 4493 if isinstance(this, exp.Expression): 4494 this.add_comments(comments) 4495 4496 self._match_r_paren(this) 4497 return self._parse_window(this) 4498 4499 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4500 transformed = [] 4501 4502 for e in expressions: 4503 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4504 if isinstance(e, exp.Alias): 4505 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4506 4507 if not isinstance(e, exp.PropertyEQ): 4508 e = self.expression( 4509 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4510 ) 4511 4512 if isinstance(e.this, exp.Column): 4513 e.this.replace(e.this.this) 4514 4515 transformed.append(e) 4516 4517 return transformed 4518 4519 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4520 return self._parse_column_def(self._parse_id_var()) 4521 4522 def _parse_user_defined_function( 4523 self, kind: t.Optional[TokenType] = None 4524 ) -> t.Optional[exp.Expression]: 4525 this = self._parse_id_var() 4526 4527 while self._match(TokenType.DOT): 4528 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4529 4530 if not self._match(TokenType.L_PAREN): 4531 return this 4532 4533 expressions = self._parse_csv(self._parse_function_parameter) 4534 self._match_r_paren() 4535 return self.expression( 4536 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4537 ) 4538 4539 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4540 literal = self._parse_primary() 4541 if literal: 4542 return self.expression(exp.Introducer, this=token.text, expression=literal) 4543 4544 return self.expression(exp.Identifier, this=token.text) 4545 4546 def _parse_session_parameter(self) -> exp.SessionParameter: 4547 kind = None 4548 this = self._parse_id_var() or self._parse_primary() 4549 4550 if this and self._match(TokenType.DOT): 4551 kind = this.name 4552 this = self._parse_var() or self._parse_primary() 4553 4554 return self.expression(exp.SessionParameter, this=this, kind=kind) 4555 4556 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4557 index = self._index 4558 4559 if self._match(TokenType.L_PAREN): 4560 expressions = t.cast( 4561 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4562 ) 4563 4564 if not self._match(TokenType.R_PAREN): 4565 self._retreat(index) 4566 else: 4567 expressions = [self._parse_id_var()] 4568 4569 if self._match_set(self.LAMBDAS): 4570 return self.LAMBDAS[self._prev.token_type](self, expressions) 4571 4572 self._retreat(index) 4573 4574 this: t.Optional[exp.Expression] 4575 4576 if self._match(TokenType.DISTINCT): 4577 this = self.expression( 4578 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4579 ) 4580 else: 4581 this = self._parse_select_or_expression(alias=alias) 4582 4583 return self._parse_limit( 4584 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4585 ) 4586 4587 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4588 index = self._index 4589 if not self._match(TokenType.L_PAREN): 4590 return this 4591 4592 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4593 # expr can be of both types 4594 if self._match_set(self.SELECT_START_TOKENS): 4595 self._retreat(index) 4596 return this 4597 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4598 self._match_r_paren() 4599 return self.expression(exp.Schema, this=this, expressions=args) 4600 4601 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4602 return self._parse_column_def(self._parse_field(any_token=True)) 4603 4604 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4605 # column defs are not really columns, they're identifiers 4606 if isinstance(this, exp.Column): 4607 this = this.this 4608 4609 kind = self._parse_types(schema=True) 4610 4611 if self._match_text_seq("FOR", "ORDINALITY"): 4612 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4613 4614 constraints: t.List[exp.Expression] = [] 4615 4616 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4617 ("ALIAS", "MATERIALIZED") 4618 ): 4619 persisted = self._prev.text.upper() == "MATERIALIZED" 4620 constraints.append( 4621 self.expression( 4622 exp.ComputedColumnConstraint, 4623 this=self._parse_conjunction(), 4624 persisted=persisted or self._match_text_seq("PERSISTED"), 4625 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4626 ) 4627 ) 4628 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4629 self._match(TokenType.ALIAS) 4630 constraints.append( 4631 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4632 ) 4633 4634 while True: 4635 constraint = self._parse_column_constraint() 4636 if not constraint: 4637 break 4638 constraints.append(constraint) 4639 4640 if not kind and not constraints: 4641 return this 4642 4643 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4644 4645 def _parse_auto_increment( 4646 self, 4647 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4648 start = None 4649 increment = None 4650 4651 if self._match(TokenType.L_PAREN, advance=False): 4652 args = self._parse_wrapped_csv(self._parse_bitwise) 4653 start = seq_get(args, 0) 4654 increment = seq_get(args, 1) 4655 elif self._match_text_seq("START"): 4656 start = self._parse_bitwise() 4657 self._match_text_seq("INCREMENT") 4658 increment = self._parse_bitwise() 4659 4660 if start and increment: 4661 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4662 4663 return exp.AutoIncrementColumnConstraint() 4664 4665 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4666 if not self._match_text_seq("REFRESH"): 4667 self._retreat(self._index - 1) 4668 return None 4669 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4670 4671 def _parse_compress(self) -> exp.CompressColumnConstraint: 4672 if self._match(TokenType.L_PAREN, advance=False): 4673 return self.expression( 4674 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4675 ) 4676 4677 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4678 4679 def _parse_generated_as_identity( 4680 self, 4681 ) -> ( 4682 exp.GeneratedAsIdentityColumnConstraint 4683 | exp.ComputedColumnConstraint 4684 | exp.GeneratedAsRowColumnConstraint 4685 ): 4686 if self._match_text_seq("BY", "DEFAULT"): 4687 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4688 this = self.expression( 4689 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4690 ) 4691 else: 4692 self._match_text_seq("ALWAYS") 4693 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4694 4695 self._match(TokenType.ALIAS) 4696 4697 if self._match_text_seq("ROW"): 4698 start = self._match_text_seq("START") 4699 if not start: 4700 self._match(TokenType.END) 4701 hidden = self._match_text_seq("HIDDEN") 4702 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4703 4704 identity = self._match_text_seq("IDENTITY") 4705 4706 if self._match(TokenType.L_PAREN): 4707 if self._match(TokenType.START_WITH): 4708 this.set("start", self._parse_bitwise()) 4709 if self._match_text_seq("INCREMENT", "BY"): 4710 this.set("increment", self._parse_bitwise()) 4711 if self._match_text_seq("MINVALUE"): 4712 this.set("minvalue", self._parse_bitwise()) 4713 if self._match_text_seq("MAXVALUE"): 4714 this.set("maxvalue", self._parse_bitwise()) 4715 4716 if self._match_text_seq("CYCLE"): 4717 this.set("cycle", True) 4718 elif self._match_text_seq("NO", "CYCLE"): 4719 this.set("cycle", False) 4720 4721 if not identity: 4722 this.set("expression", self._parse_range()) 4723 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4724 args = self._parse_csv(self._parse_bitwise) 4725 this.set("start", seq_get(args, 0)) 4726 this.set("increment", seq_get(args, 1)) 4727 4728 self._match_r_paren() 4729 4730 return this 4731 4732 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4733 self._match_text_seq("LENGTH") 4734 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4735 4736 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4737 if self._match_text_seq("NULL"): 4738 return self.expression(exp.NotNullColumnConstraint) 4739 if self._match_text_seq("CASESPECIFIC"): 4740 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4741 if self._match_text_seq("FOR", "REPLICATION"): 4742 return self.expression(exp.NotForReplicationColumnConstraint) 4743 return None 4744 4745 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4746 if self._match(TokenType.CONSTRAINT): 4747 this = self._parse_id_var() 4748 else: 4749 this = None 4750 4751 if self._match_texts(self.CONSTRAINT_PARSERS): 4752 return self.expression( 4753 exp.ColumnConstraint, 4754 this=this, 4755 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4756 ) 4757 4758 return this 4759 4760 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4761 if not self._match(TokenType.CONSTRAINT): 4762 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4763 4764 return self.expression( 4765 exp.Constraint, 4766 this=self._parse_id_var(), 4767 expressions=self._parse_unnamed_constraints(), 4768 ) 4769 4770 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4771 constraints = [] 4772 while True: 4773 constraint = self._parse_unnamed_constraint() or self._parse_function() 4774 if not constraint: 4775 break 4776 constraints.append(constraint) 4777 4778 return constraints 4779 4780 def _parse_unnamed_constraint( 4781 self, constraints: t.Optional[t.Collection[str]] = None 4782 ) -> t.Optional[exp.Expression]: 4783 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4784 constraints or self.CONSTRAINT_PARSERS 4785 ): 4786 return None 4787 4788 constraint = self._prev.text.upper() 4789 if constraint not in self.CONSTRAINT_PARSERS: 4790 self.raise_error(f"No parser found for schema constraint {constraint}.") 4791 4792 return self.CONSTRAINT_PARSERS[constraint](self) 4793 4794 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4795 self._match_text_seq("KEY") 4796 return self.expression( 4797 exp.UniqueColumnConstraint, 4798 this=self._parse_schema(self._parse_id_var(any_token=False)), 4799 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4800 on_conflict=self._parse_on_conflict(), 4801 ) 4802 4803 def _parse_key_constraint_options(self) -> t.List[str]: 4804 options = [] 4805 while True: 4806 if not self._curr: 4807 break 4808 4809 if self._match(TokenType.ON): 4810 action = None 4811 on = self._advance_any() and self._prev.text 4812 4813 if self._match_text_seq("NO", "ACTION"): 4814 action = "NO ACTION" 4815 elif self._match_text_seq("CASCADE"): 4816 action = "CASCADE" 4817 elif self._match_text_seq("RESTRICT"): 4818 action = "RESTRICT" 4819 elif self._match_pair(TokenType.SET, TokenType.NULL): 4820 action = "SET NULL" 4821 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4822 action = "SET DEFAULT" 4823 else: 4824 self.raise_error("Invalid key constraint") 4825 4826 options.append(f"ON {on} {action}") 4827 elif self._match_text_seq("NOT", "ENFORCED"): 4828 options.append("NOT ENFORCED") 4829 elif self._match_text_seq("DEFERRABLE"): 4830 options.append("DEFERRABLE") 4831 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4832 options.append("INITIALLY DEFERRED") 4833 elif self._match_text_seq("NORELY"): 4834 options.append("NORELY") 4835 elif self._match_text_seq("MATCH", "FULL"): 4836 options.append("MATCH FULL") 4837 else: 4838 break 4839 4840 return options 4841 4842 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4843 if match and not self._match(TokenType.REFERENCES): 4844 return None 4845 4846 expressions = None 4847 this = self._parse_table(schema=True) 4848 options = self._parse_key_constraint_options() 4849 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4850 4851 def _parse_foreign_key(self) -> exp.ForeignKey: 4852 expressions = self._parse_wrapped_id_vars() 4853 reference = self._parse_references() 4854 options = {} 4855 4856 while self._match(TokenType.ON): 4857 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4858 self.raise_error("Expected DELETE or UPDATE") 4859 4860 kind = self._prev.text.lower() 4861 4862 if self._match_text_seq("NO", "ACTION"): 4863 action = "NO ACTION" 4864 elif self._match(TokenType.SET): 4865 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4866 action = "SET " + self._prev.text.upper() 4867 else: 4868 self._advance() 4869 action = self._prev.text.upper() 4870 4871 options[kind] = action 4872 4873 return self.expression( 4874 exp.ForeignKey, 4875 expressions=expressions, 4876 reference=reference, 4877 **options, # type: ignore 4878 ) 4879 4880 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4881 return self._parse_field() 4882 4883 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4884 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4885 self._retreat(self._index - 1) 4886 return None 4887 4888 id_vars = self._parse_wrapped_id_vars() 4889 return self.expression( 4890 exp.PeriodForSystemTimeConstraint, 4891 this=seq_get(id_vars, 0), 4892 expression=seq_get(id_vars, 1), 4893 ) 4894 4895 def _parse_primary_key( 4896 self, wrapped_optional: bool = False, in_props: bool = False 4897 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4898 desc = ( 4899 self._match_set((TokenType.ASC, TokenType.DESC)) 4900 and self._prev.token_type == TokenType.DESC 4901 ) 4902 4903 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4904 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4905 4906 expressions = self._parse_wrapped_csv( 4907 self._parse_primary_key_part, optional=wrapped_optional 4908 ) 4909 options = self._parse_key_constraint_options() 4910 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4911 4912 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4913 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4914 4915 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4916 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4917 return this 4918 4919 bracket_kind = self._prev.token_type 4920 expressions = self._parse_csv( 4921 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4922 ) 4923 4924 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4925 self.raise_error("Expected ]") 4926 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4927 self.raise_error("Expected }") 4928 4929 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4930 if bracket_kind == TokenType.L_BRACE: 4931 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4932 elif not this or this.name.upper() == "ARRAY": 4933 this = self.expression(exp.Array, expressions=expressions) 4934 else: 4935 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4936 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4937 4938 self._add_comments(this) 4939 return self._parse_bracket(this) 4940 4941 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4942 if self._match(TokenType.COLON): 4943 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4944 return this 4945 4946 def _parse_case(self) -> t.Optional[exp.Expression]: 4947 ifs = [] 4948 default = None 4949 4950 comments = self._prev_comments 4951 expression = self._parse_conjunction() 4952 4953 while self._match(TokenType.WHEN): 4954 this = self._parse_conjunction() 4955 self._match(TokenType.THEN) 4956 then = self._parse_conjunction() 4957 ifs.append(self.expression(exp.If, this=this, true=then)) 4958 4959 if self._match(TokenType.ELSE): 4960 default = self._parse_conjunction() 4961 4962 if not self._match(TokenType.END): 4963 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4964 default = exp.column("interval") 4965 else: 4966 self.raise_error("Expected END after CASE", self._prev) 4967 4968 return self.expression( 4969 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4970 ) 4971 4972 def _parse_if(self) -> t.Optional[exp.Expression]: 4973 if self._match(TokenType.L_PAREN): 4974 args = self._parse_csv(self._parse_conjunction) 4975 this = self.validate_expression(exp.If.from_arg_list(args), args) 4976 self._match_r_paren() 4977 else: 4978 index = self._index - 1 4979 4980 if self.NO_PAREN_IF_COMMANDS and index == 0: 4981 return self._parse_as_command(self._prev) 4982 4983 condition = self._parse_conjunction() 4984 4985 if not condition: 4986 self._retreat(index) 4987 return None 4988 4989 self._match(TokenType.THEN) 4990 true = self._parse_conjunction() 4991 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4992 self._match(TokenType.END) 4993 this = self.expression(exp.If, this=condition, true=true, false=false) 4994 4995 return this 4996 4997 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4998 if not self._match_text_seq("VALUE", "FOR"): 4999 self._retreat(self._index - 1) 5000 return None 5001 5002 return self.expression( 5003 exp.NextValueFor, 5004 this=self._parse_column(), 5005 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5006 ) 5007 5008 def _parse_extract(self) -> exp.Extract: 5009 this = self._parse_function() or self._parse_var() or self._parse_type() 5010 5011 if self._match(TokenType.FROM): 5012 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5013 5014 if not self._match(TokenType.COMMA): 5015 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5016 5017 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5018 5019 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5020 this = self._parse_conjunction() 5021 5022 if not self._match(TokenType.ALIAS): 5023 if self._match(TokenType.COMMA): 5024 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5025 5026 self.raise_error("Expected AS after CAST") 5027 5028 fmt = None 5029 to = self._parse_types() 5030 5031 if self._match(TokenType.FORMAT): 5032 fmt_string = self._parse_string() 5033 fmt = self._parse_at_time_zone(fmt_string) 5034 5035 if not to: 5036 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5037 if to.this in exp.DataType.TEMPORAL_TYPES: 5038 this = self.expression( 5039 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5040 this=this, 5041 format=exp.Literal.string( 5042 format_time( 5043 fmt_string.this if fmt_string else "", 5044 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5045 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5046 ) 5047 ), 5048 ) 5049 5050 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5051 this.set("zone", fmt.args["zone"]) 5052 return this 5053 elif not to: 5054 self.raise_error("Expected TYPE after CAST") 5055 elif isinstance(to, exp.Identifier): 5056 to = exp.DataType.build(to.name, udt=True) 5057 elif to.this == exp.DataType.Type.CHAR: 5058 if self._match(TokenType.CHARACTER_SET): 5059 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5060 5061 return self.expression( 5062 exp.Cast if strict else exp.TryCast, 5063 this=this, 5064 to=to, 5065 format=fmt, 5066 safe=safe, 5067 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5068 ) 5069 5070 def _parse_string_agg(self) -> exp.Expression: 5071 if self._match(TokenType.DISTINCT): 5072 args: t.List[t.Optional[exp.Expression]] = [ 5073 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5074 ] 5075 if self._match(TokenType.COMMA): 5076 args.extend(self._parse_csv(self._parse_conjunction)) 5077 else: 5078 args = self._parse_csv(self._parse_conjunction) # type: ignore 5079 5080 index = self._index 5081 if not self._match(TokenType.R_PAREN) and args: 5082 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5083 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5084 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5085 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5086 5087 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5088 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5089 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5090 if not self._match_text_seq("WITHIN", "GROUP"): 5091 self._retreat(index) 5092 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5093 5094 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5095 order = self._parse_order(this=seq_get(args, 0)) 5096 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5097 5098 def _parse_convert( 5099 self, strict: bool, safe: t.Optional[bool] = None 5100 ) -> t.Optional[exp.Expression]: 5101 this = self._parse_bitwise() 5102 5103 if self._match(TokenType.USING): 5104 to: t.Optional[exp.Expression] = self.expression( 5105 exp.CharacterSet, this=self._parse_var() 5106 ) 5107 elif self._match(TokenType.COMMA): 5108 to = self._parse_types() 5109 else: 5110 to = None 5111 5112 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5113 5114 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5115 """ 5116 There are generally two variants of the DECODE function: 5117 5118 - DECODE(bin, charset) 5119 - DECODE(expression, search, result [, search, result] ... [, default]) 5120 5121 The second variant will always be parsed into a CASE expression. Note that NULL 5122 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5123 instead of relying on pattern matching. 5124 """ 5125 args = self._parse_csv(self._parse_conjunction) 5126 5127 if len(args) < 3: 5128 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5129 5130 expression, *expressions = args 5131 if not expression: 5132 return None 5133 5134 ifs = [] 5135 for search, result in zip(expressions[::2], expressions[1::2]): 5136 if not search or not result: 5137 return None 5138 5139 if isinstance(search, exp.Literal): 5140 ifs.append( 5141 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5142 ) 5143 elif isinstance(search, exp.Null): 5144 ifs.append( 5145 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5146 ) 5147 else: 5148 cond = exp.or_( 5149 exp.EQ(this=expression.copy(), expression=search), 5150 exp.and_( 5151 exp.Is(this=expression.copy(), expression=exp.Null()), 5152 exp.Is(this=search.copy(), expression=exp.Null()), 5153 copy=False, 5154 ), 5155 copy=False, 5156 ) 5157 ifs.append(exp.If(this=cond, true=result)) 5158 5159 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5160 5161 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5162 self._match_text_seq("KEY") 5163 key = self._parse_column() 5164 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5165 self._match_text_seq("VALUE") 5166 value = self._parse_bitwise() 5167 5168 if not key and not value: 5169 return None 5170 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5171 5172 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5173 if not this or not self._match_text_seq("FORMAT", "JSON"): 5174 return this 5175 5176 return self.expression(exp.FormatJson, this=this) 5177 5178 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5179 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5180 for value in values: 5181 if self._match_text_seq(value, "ON", on): 5182 return f"{value} ON {on}" 5183 5184 return None 5185 5186 @t.overload 5187 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5188 5189 @t.overload 5190 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5191 5192 def _parse_json_object(self, agg=False): 5193 star = self._parse_star() 5194 expressions = ( 5195 [star] 5196 if star 5197 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5198 ) 5199 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5200 5201 unique_keys = None 5202 if self._match_text_seq("WITH", "UNIQUE"): 5203 unique_keys = True 5204 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5205 unique_keys = False 5206 5207 self._match_text_seq("KEYS") 5208 5209 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5210 self._parse_type() 5211 ) 5212 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5213 5214 return self.expression( 5215 exp.JSONObjectAgg if agg else exp.JSONObject, 5216 expressions=expressions, 5217 null_handling=null_handling, 5218 unique_keys=unique_keys, 5219 return_type=return_type, 5220 encoding=encoding, 5221 ) 5222 5223 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5224 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5225 if not self._match_text_seq("NESTED"): 5226 this = self._parse_id_var() 5227 kind = self._parse_types(allow_identifiers=False) 5228 nested = None 5229 else: 5230 this = None 5231 kind = None 5232 nested = True 5233 5234 path = self._match_text_seq("PATH") and self._parse_string() 5235 nested_schema = nested and self._parse_json_schema() 5236 5237 return self.expression( 5238 exp.JSONColumnDef, 5239 this=this, 5240 kind=kind, 5241 path=path, 5242 nested_schema=nested_schema, 5243 ) 5244 5245 def _parse_json_schema(self) -> exp.JSONSchema: 5246 self._match_text_seq("COLUMNS") 5247 return self.expression( 5248 exp.JSONSchema, 5249 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5250 ) 5251 5252 def _parse_json_table(self) -> exp.JSONTable: 5253 this = self._parse_format_json(self._parse_bitwise()) 5254 path = self._match(TokenType.COMMA) and self._parse_string() 5255 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5256 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5257 schema = self._parse_json_schema() 5258 5259 return exp.JSONTable( 5260 this=this, 5261 schema=schema, 5262 path=path, 5263 error_handling=error_handling, 5264 empty_handling=empty_handling, 5265 ) 5266 5267 def _parse_match_against(self) -> exp.MatchAgainst: 5268 expressions = self._parse_csv(self._parse_column) 5269 5270 self._match_text_seq(")", "AGAINST", "(") 5271 5272 this = self._parse_string() 5273 5274 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5275 modifier = "IN NATURAL LANGUAGE MODE" 5276 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5277 modifier = f"{modifier} WITH QUERY EXPANSION" 5278 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5279 modifier = "IN BOOLEAN MODE" 5280 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5281 modifier = "WITH QUERY EXPANSION" 5282 else: 5283 modifier = None 5284 5285 return self.expression( 5286 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5287 ) 5288 5289 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5290 def _parse_open_json(self) -> exp.OpenJSON: 5291 this = self._parse_bitwise() 5292 path = self._match(TokenType.COMMA) and self._parse_string() 5293 5294 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5295 this = self._parse_field(any_token=True) 5296 kind = self._parse_types() 5297 path = self._parse_string() 5298 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5299 5300 return self.expression( 5301 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5302 ) 5303 5304 expressions = None 5305 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5306 self._match_l_paren() 5307 expressions = self._parse_csv(_parse_open_json_column_def) 5308 5309 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5310 5311 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5312 args = self._parse_csv(self._parse_bitwise) 5313 5314 if self._match(TokenType.IN): 5315 return self.expression( 5316 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5317 ) 5318 5319 if haystack_first: 5320 haystack = seq_get(args, 0) 5321 needle = seq_get(args, 1) 5322 else: 5323 needle = seq_get(args, 0) 5324 haystack = seq_get(args, 1) 5325 5326 return self.expression( 5327 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5328 ) 5329 5330 def _parse_predict(self) -> exp.Predict: 5331 self._match_text_seq("MODEL") 5332 this = self._parse_table() 5333 5334 self._match(TokenType.COMMA) 5335 self._match_text_seq("TABLE") 5336 5337 return self.expression( 5338 exp.Predict, 5339 this=this, 5340 expression=self._parse_table(), 5341 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5342 ) 5343 5344 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5345 args = self._parse_csv(self._parse_table) 5346 return exp.JoinHint(this=func_name.upper(), expressions=args) 5347 5348 def _parse_substring(self) -> exp.Substring: 5349 # Postgres supports the form: substring(string [from int] [for int]) 5350 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5351 5352 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5353 5354 if self._match(TokenType.FROM): 5355 args.append(self._parse_bitwise()) 5356 if self._match(TokenType.FOR): 5357 if len(args) == 1: 5358 args.append(exp.Literal.number(1)) 5359 args.append(self._parse_bitwise()) 5360 5361 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5362 5363 def _parse_trim(self) -> exp.Trim: 5364 # https://www.w3resource.com/sql/character-functions/trim.php 5365 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5366 5367 position = None 5368 collation = None 5369 expression = None 5370 5371 if self._match_texts(self.TRIM_TYPES): 5372 position = self._prev.text.upper() 5373 5374 this = self._parse_bitwise() 5375 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5376 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5377 expression = self._parse_bitwise() 5378 5379 if invert_order: 5380 this, expression = expression, this 5381 5382 if self._match(TokenType.COLLATE): 5383 collation = self._parse_bitwise() 5384 5385 return self.expression( 5386 exp.Trim, this=this, position=position, expression=expression, collation=collation 5387 ) 5388 5389 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5390 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5391 5392 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5393 return self._parse_window(self._parse_id_var(), alias=True) 5394 5395 def _parse_respect_or_ignore_nulls( 5396 self, this: t.Optional[exp.Expression] 5397 ) -> t.Optional[exp.Expression]: 5398 if self._match_text_seq("IGNORE", "NULLS"): 5399 return self.expression(exp.IgnoreNulls, this=this) 5400 if self._match_text_seq("RESPECT", "NULLS"): 5401 return self.expression(exp.RespectNulls, this=this) 5402 return this 5403 5404 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5405 if self._match(TokenType.HAVING): 5406 self._match_texts(("MAX", "MIN")) 5407 max = self._prev.text.upper() != "MIN" 5408 return self.expression( 5409 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5410 ) 5411 5412 return this 5413 5414 def _parse_window( 5415 self, this: t.Optional[exp.Expression], alias: bool = False 5416 ) -> t.Optional[exp.Expression]: 5417 func = this 5418 comments = func.comments if isinstance(func, exp.Expression) else None 5419 5420 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5421 self._match(TokenType.WHERE) 5422 this = self.expression( 5423 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5424 ) 5425 self._match_r_paren() 5426 5427 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5428 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5429 if self._match_text_seq("WITHIN", "GROUP"): 5430 order = self._parse_wrapped(self._parse_order) 5431 this = self.expression(exp.WithinGroup, this=this, expression=order) 5432 5433 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5434 # Some dialects choose to implement and some do not. 5435 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5436 5437 # There is some code above in _parse_lambda that handles 5438 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5439 5440 # The below changes handle 5441 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5442 5443 # Oracle allows both formats 5444 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5445 # and Snowflake chose to do the same for familiarity 5446 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5447 if isinstance(this, exp.AggFunc): 5448 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5449 5450 if ignore_respect and ignore_respect is not this: 5451 ignore_respect.replace(ignore_respect.this) 5452 this = self.expression(ignore_respect.__class__, this=this) 5453 5454 this = self._parse_respect_or_ignore_nulls(this) 5455 5456 # bigquery select from window x AS (partition by ...) 5457 if alias: 5458 over = None 5459 self._match(TokenType.ALIAS) 5460 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5461 return this 5462 else: 5463 over = self._prev.text.upper() 5464 5465 if comments and isinstance(func, exp.Expression): 5466 func.pop_comments() 5467 5468 if not self._match(TokenType.L_PAREN): 5469 return self.expression( 5470 exp.Window, 5471 comments=comments, 5472 this=this, 5473 alias=self._parse_id_var(False), 5474 over=over, 5475 ) 5476 5477 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5478 5479 first = self._match(TokenType.FIRST) 5480 if self._match_text_seq("LAST"): 5481 first = False 5482 5483 partition, order = self._parse_partition_and_order() 5484 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5485 5486 if kind: 5487 self._match(TokenType.BETWEEN) 5488 start = self._parse_window_spec() 5489 self._match(TokenType.AND) 5490 end = self._parse_window_spec() 5491 5492 spec = self.expression( 5493 exp.WindowSpec, 5494 kind=kind, 5495 start=start["value"], 5496 start_side=start["side"], 5497 end=end["value"], 5498 end_side=end["side"], 5499 ) 5500 else: 5501 spec = None 5502 5503 self._match_r_paren() 5504 5505 window = self.expression( 5506 exp.Window, 5507 comments=comments, 5508 this=this, 5509 partition_by=partition, 5510 order=order, 5511 spec=spec, 5512 alias=window_alias, 5513 over=over, 5514 first=first, 5515 ) 5516 5517 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5518 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5519 return self._parse_window(window, alias=alias) 5520 5521 return window 5522 5523 def _parse_partition_and_order( 5524 self, 5525 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5526 return self._parse_partition_by(), self._parse_order() 5527 5528 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5529 self._match(TokenType.BETWEEN) 5530 5531 return { 5532 "value": ( 5533 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5534 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5535 or self._parse_bitwise() 5536 ), 5537 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5538 } 5539 5540 def _parse_alias( 5541 self, this: t.Optional[exp.Expression], explicit: bool = False 5542 ) -> t.Optional[exp.Expression]: 5543 any_token = self._match(TokenType.ALIAS) 5544 comments = self._prev_comments or [] 5545 5546 if explicit and not any_token: 5547 return this 5548 5549 if self._match(TokenType.L_PAREN): 5550 aliases = self.expression( 5551 exp.Aliases, 5552 comments=comments, 5553 this=this, 5554 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5555 ) 5556 self._match_r_paren(aliases) 5557 return aliases 5558 5559 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5560 self.STRING_ALIASES and self._parse_string_as_identifier() 5561 ) 5562 5563 if alias: 5564 comments.extend(alias.pop_comments()) 5565 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5566 column = this.this 5567 5568 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5569 if not this.comments and column and column.comments: 5570 this.comments = column.pop_comments() 5571 5572 return this 5573 5574 def _parse_id_var( 5575 self, 5576 any_token: bool = True, 5577 tokens: t.Optional[t.Collection[TokenType]] = None, 5578 ) -> t.Optional[exp.Expression]: 5579 expression = self._parse_identifier() 5580 if not expression and ( 5581 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5582 ): 5583 quoted = self._prev.token_type == TokenType.STRING 5584 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5585 5586 return expression 5587 5588 def _parse_string(self) -> t.Optional[exp.Expression]: 5589 if self._match_set(self.STRING_PARSERS): 5590 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5591 return self._parse_placeholder() 5592 5593 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5594 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5595 5596 def _parse_number(self) -> t.Optional[exp.Expression]: 5597 if self._match_set(self.NUMERIC_PARSERS): 5598 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5599 return self._parse_placeholder() 5600 5601 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5602 if self._match(TokenType.IDENTIFIER): 5603 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5604 return self._parse_placeholder() 5605 5606 def _parse_var( 5607 self, 5608 any_token: bool = False, 5609 tokens: t.Optional[t.Collection[TokenType]] = None, 5610 upper: bool = False, 5611 ) -> t.Optional[exp.Expression]: 5612 if ( 5613 (any_token and self._advance_any()) 5614 or self._match(TokenType.VAR) 5615 or (self._match_set(tokens) if tokens else False) 5616 ): 5617 return self.expression( 5618 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5619 ) 5620 return self._parse_placeholder() 5621 5622 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5623 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5624 self._advance() 5625 return self._prev 5626 return None 5627 5628 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5629 return self._parse_var() or self._parse_string() 5630 5631 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5632 return self._parse_primary() or self._parse_var(any_token=True) 5633 5634 def _parse_null(self) -> t.Optional[exp.Expression]: 5635 if self._match_set(self.NULL_TOKENS): 5636 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5637 return self._parse_placeholder() 5638 5639 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5640 if self._match(TokenType.TRUE): 5641 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5642 if self._match(TokenType.FALSE): 5643 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5644 return self._parse_placeholder() 5645 5646 def _parse_star(self) -> t.Optional[exp.Expression]: 5647 if self._match(TokenType.STAR): 5648 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5649 return self._parse_placeholder() 5650 5651 def _parse_parameter(self) -> exp.Parameter: 5652 this = self._parse_identifier() or self._parse_primary_or_var() 5653 return self.expression(exp.Parameter, this=this) 5654 5655 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5656 if self._match_set(self.PLACEHOLDER_PARSERS): 5657 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5658 if placeholder: 5659 return placeholder 5660 self._advance(-1) 5661 return None 5662 5663 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5664 if not self._match(TokenType.EXCEPT): 5665 return None 5666 if self._match(TokenType.L_PAREN, advance=False): 5667 return self._parse_wrapped_csv(self._parse_column) 5668 5669 except_column = self._parse_column() 5670 return [except_column] if except_column else None 5671 5672 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5673 if not self._match(TokenType.REPLACE): 5674 return None 5675 if self._match(TokenType.L_PAREN, advance=False): 5676 return self._parse_wrapped_csv(self._parse_expression) 5677 5678 replace_expression = self._parse_expression() 5679 return [replace_expression] if replace_expression else None 5680 5681 def _parse_csv( 5682 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5683 ) -> t.List[exp.Expression]: 5684 parse_result = parse_method() 5685 items = [parse_result] if parse_result is not None else [] 5686 5687 while self._match(sep): 5688 self._add_comments(parse_result) 5689 parse_result = parse_method() 5690 if parse_result is not None: 5691 items.append(parse_result) 5692 5693 return items 5694 5695 def _parse_tokens( 5696 self, parse_method: t.Callable, expressions: t.Dict 5697 ) -> t.Optional[exp.Expression]: 5698 this = parse_method() 5699 5700 while self._match_set(expressions): 5701 this = self.expression( 5702 expressions[self._prev.token_type], 5703 this=this, 5704 comments=self._prev_comments, 5705 expression=parse_method(), 5706 ) 5707 5708 return this 5709 5710 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5711 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5712 5713 def _parse_wrapped_csv( 5714 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5715 ) -> t.List[exp.Expression]: 5716 return self._parse_wrapped( 5717 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5718 ) 5719 5720 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5721 wrapped = self._match(TokenType.L_PAREN) 5722 if not wrapped and not optional: 5723 self.raise_error("Expecting (") 5724 parse_result = parse_method() 5725 if wrapped: 5726 self._match_r_paren() 5727 return parse_result 5728 5729 def _parse_expressions(self) -> t.List[exp.Expression]: 5730 return self._parse_csv(self._parse_expression) 5731 5732 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5733 return self._parse_select() or self._parse_set_operations( 5734 self._parse_expression() if alias else self._parse_conjunction() 5735 ) 5736 5737 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5738 return self._parse_query_modifiers( 5739 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5740 ) 5741 5742 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5743 this = None 5744 if self._match_texts(self.TRANSACTION_KIND): 5745 this = self._prev.text 5746 5747 self._match_texts(("TRANSACTION", "WORK")) 5748 5749 modes = [] 5750 while True: 5751 mode = [] 5752 while self._match(TokenType.VAR): 5753 mode.append(self._prev.text) 5754 5755 if mode: 5756 modes.append(" ".join(mode)) 5757 if not self._match(TokenType.COMMA): 5758 break 5759 5760 return self.expression(exp.Transaction, this=this, modes=modes) 5761 5762 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5763 chain = None 5764 savepoint = None 5765 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5766 5767 self._match_texts(("TRANSACTION", "WORK")) 5768 5769 if self._match_text_seq("TO"): 5770 self._match_text_seq("SAVEPOINT") 5771 savepoint = self._parse_id_var() 5772 5773 if self._match(TokenType.AND): 5774 chain = not self._match_text_seq("NO") 5775 self._match_text_seq("CHAIN") 5776 5777 if is_rollback: 5778 return self.expression(exp.Rollback, savepoint=savepoint) 5779 5780 return self.expression(exp.Commit, chain=chain) 5781 5782 def _parse_refresh(self) -> exp.Refresh: 5783 self._match(TokenType.TABLE) 5784 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5785 5786 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5787 if not self._match_text_seq("ADD"): 5788 return None 5789 5790 self._match(TokenType.COLUMN) 5791 exists_column = self._parse_exists(not_=True) 5792 expression = self._parse_field_def() 5793 5794 if expression: 5795 expression.set("exists", exists_column) 5796 5797 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5798 if self._match_texts(("FIRST", "AFTER")): 5799 position = self._prev.text 5800 column_position = self.expression( 5801 exp.ColumnPosition, this=self._parse_column(), position=position 5802 ) 5803 expression.set("position", column_position) 5804 5805 return expression 5806 5807 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5808 drop = self._match(TokenType.DROP) and self._parse_drop() 5809 if drop and not isinstance(drop, exp.Command): 5810 drop.set("kind", drop.args.get("kind", "COLUMN")) 5811 return drop 5812 5813 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5814 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5815 return self.expression( 5816 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5817 ) 5818 5819 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5820 index = self._index - 1 5821 5822 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5823 return self._parse_csv( 5824 lambda: self.expression( 5825 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5826 ) 5827 ) 5828 5829 self._retreat(index) 5830 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5831 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5832 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5833 5834 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 5835 if self._match_texts(self.ALTER_ALTER_PARSERS): 5836 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 5837 5838 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 5839 # keyword after ALTER we default to parsing this statement 5840 self._match(TokenType.COLUMN) 5841 column = self._parse_field(any_token=True) 5842 5843 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5844 return self.expression(exp.AlterColumn, this=column, drop=True) 5845 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5846 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5847 if self._match(TokenType.COMMENT): 5848 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5849 5850 self._match_text_seq("SET", "DATA") 5851 self._match_text_seq("TYPE") 5852 return self.expression( 5853 exp.AlterColumn, 5854 this=column, 5855 dtype=self._parse_types(), 5856 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5857 using=self._match(TokenType.USING) and self._parse_conjunction(), 5858 ) 5859 5860 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 5861 if self._match_texts(("ALL", "EVEN", "AUTO")): 5862 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 5863 5864 self._match_text_seq("KEY", "DISTKEY") 5865 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 5866 5867 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 5868 if compound: 5869 self._match_text_seq("SORTKEY") 5870 5871 if self._match(TokenType.L_PAREN, advance=False): 5872 return self.expression( 5873 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 5874 ) 5875 5876 self._match_texts(("AUTO", "NONE")) 5877 return self.expression( 5878 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 5879 ) 5880 5881 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5882 index = self._index - 1 5883 5884 partition_exists = self._parse_exists() 5885 if self._match(TokenType.PARTITION, advance=False): 5886 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5887 5888 self._retreat(index) 5889 return self._parse_csv(self._parse_drop_column) 5890 5891 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5892 if self._match(TokenType.COLUMN): 5893 exists = self._parse_exists() 5894 old_column = self._parse_column() 5895 to = self._match_text_seq("TO") 5896 new_column = self._parse_column() 5897 5898 if old_column is None or to is None or new_column is None: 5899 return None 5900 5901 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5902 5903 self._match_text_seq("TO") 5904 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5905 5906 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5907 start = self._prev 5908 5909 if not self._match(TokenType.TABLE): 5910 return self._parse_as_command(start) 5911 5912 exists = self._parse_exists() 5913 only = self._match_text_seq("ONLY") 5914 this = self._parse_table(schema=True) 5915 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 5916 5917 if self._next: 5918 self._advance() 5919 5920 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5921 if parser: 5922 actions = ensure_list(parser(self)) 5923 options = self._parse_csv(self._parse_property) 5924 5925 if not self._curr and actions: 5926 return self.expression( 5927 exp.AlterTable, 5928 this=this, 5929 exists=exists, 5930 actions=actions, 5931 only=only, 5932 options=options, 5933 cluster=cluster, 5934 ) 5935 5936 return self._parse_as_command(start) 5937 5938 def _parse_merge(self) -> exp.Merge: 5939 self._match(TokenType.INTO) 5940 target = self._parse_table() 5941 5942 if target and self._match(TokenType.ALIAS, advance=False): 5943 target.set("alias", self._parse_table_alias()) 5944 5945 self._match(TokenType.USING) 5946 using = self._parse_table() 5947 5948 self._match(TokenType.ON) 5949 on = self._parse_conjunction() 5950 5951 return self.expression( 5952 exp.Merge, 5953 this=target, 5954 using=using, 5955 on=on, 5956 expressions=self._parse_when_matched(), 5957 ) 5958 5959 def _parse_when_matched(self) -> t.List[exp.When]: 5960 whens = [] 5961 5962 while self._match(TokenType.WHEN): 5963 matched = not self._match(TokenType.NOT) 5964 self._match_text_seq("MATCHED") 5965 source = ( 5966 False 5967 if self._match_text_seq("BY", "TARGET") 5968 else self._match_text_seq("BY", "SOURCE") 5969 ) 5970 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5971 5972 self._match(TokenType.THEN) 5973 5974 if self._match(TokenType.INSERT): 5975 _this = self._parse_star() 5976 if _this: 5977 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5978 else: 5979 then = self.expression( 5980 exp.Insert, 5981 this=self._parse_value(), 5982 expression=self._match_text_seq("VALUES") and self._parse_value(), 5983 ) 5984 elif self._match(TokenType.UPDATE): 5985 expressions = self._parse_star() 5986 if expressions: 5987 then = self.expression(exp.Update, expressions=expressions) 5988 else: 5989 then = self.expression( 5990 exp.Update, 5991 expressions=self._match(TokenType.SET) 5992 and self._parse_csv(self._parse_equality), 5993 ) 5994 elif self._match(TokenType.DELETE): 5995 then = self.expression(exp.Var, this=self._prev.text) 5996 else: 5997 then = None 5998 5999 whens.append( 6000 self.expression( 6001 exp.When, 6002 matched=matched, 6003 source=source, 6004 condition=condition, 6005 then=then, 6006 ) 6007 ) 6008 return whens 6009 6010 def _parse_show(self) -> t.Optional[exp.Expression]: 6011 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6012 if parser: 6013 return parser(self) 6014 return self._parse_as_command(self._prev) 6015 6016 def _parse_set_item_assignment( 6017 self, kind: t.Optional[str] = None 6018 ) -> t.Optional[exp.Expression]: 6019 index = self._index 6020 6021 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6022 return self._parse_set_transaction(global_=kind == "GLOBAL") 6023 6024 left = self._parse_primary() or self._parse_column() 6025 assignment_delimiter = self._match_texts(("=", "TO")) 6026 6027 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6028 self._retreat(index) 6029 return None 6030 6031 right = self._parse_statement() or self._parse_id_var() 6032 this = self.expression(exp.EQ, this=left, expression=right) 6033 6034 return self.expression(exp.SetItem, this=this, kind=kind) 6035 6036 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6037 self._match_text_seq("TRANSACTION") 6038 characteristics = self._parse_csv( 6039 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6040 ) 6041 return self.expression( 6042 exp.SetItem, 6043 expressions=characteristics, 6044 kind="TRANSACTION", 6045 **{"global": global_}, # type: ignore 6046 ) 6047 6048 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6049 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6050 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6051 6052 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6053 index = self._index 6054 set_ = self.expression( 6055 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6056 ) 6057 6058 if self._curr: 6059 self._retreat(index) 6060 return self._parse_as_command(self._prev) 6061 6062 return set_ 6063 6064 def _parse_var_from_options( 6065 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6066 ) -> t.Optional[exp.Var]: 6067 start = self._curr 6068 if not start: 6069 return None 6070 6071 option = start.text.upper() 6072 continuations = options.get(option) 6073 6074 index = self._index 6075 self._advance() 6076 for keywords in continuations or []: 6077 if isinstance(keywords, str): 6078 keywords = (keywords,) 6079 6080 if self._match_text_seq(*keywords): 6081 option = f"{option} {' '.join(keywords)}" 6082 break 6083 else: 6084 if continuations or continuations is None: 6085 if raise_unmatched: 6086 self.raise_error(f"Unknown option {option}") 6087 6088 self._retreat(index) 6089 return None 6090 6091 return exp.var(option) 6092 6093 def _parse_as_command(self, start: Token) -> exp.Command: 6094 while self._curr: 6095 self._advance() 6096 text = self._find_sql(start, self._prev) 6097 size = len(start.text) 6098 self._warn_unsupported() 6099 return exp.Command(this=text[:size], expression=text[size:]) 6100 6101 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6102 settings = [] 6103 6104 self._match_l_paren() 6105 kind = self._parse_id_var() 6106 6107 if self._match(TokenType.L_PAREN): 6108 while True: 6109 key = self._parse_id_var() 6110 value = self._parse_primary() 6111 6112 if not key and value is None: 6113 break 6114 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6115 self._match(TokenType.R_PAREN) 6116 6117 self._match_r_paren() 6118 6119 return self.expression( 6120 exp.DictProperty, 6121 this=this, 6122 kind=kind.this if kind else None, 6123 settings=settings, 6124 ) 6125 6126 def _parse_dict_range(self, this: str) -> exp.DictRange: 6127 self._match_l_paren() 6128 has_min = self._match_text_seq("MIN") 6129 if has_min: 6130 min = self._parse_var() or self._parse_primary() 6131 self._match_text_seq("MAX") 6132 max = self._parse_var() or self._parse_primary() 6133 else: 6134 max = self._parse_var() or self._parse_primary() 6135 min = exp.Literal.number(0) 6136 self._match_r_paren() 6137 return self.expression(exp.DictRange, this=this, min=min, max=max) 6138 6139 def _parse_comprehension( 6140 self, this: t.Optional[exp.Expression] 6141 ) -> t.Optional[exp.Comprehension]: 6142 index = self._index 6143 expression = self._parse_column() 6144 if not self._match(TokenType.IN): 6145 self._retreat(index - 1) 6146 return None 6147 iterator = self._parse_column() 6148 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6149 return self.expression( 6150 exp.Comprehension, 6151 this=this, 6152 expression=expression, 6153 iterator=iterator, 6154 condition=condition, 6155 ) 6156 6157 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6158 if self._match(TokenType.HEREDOC_STRING): 6159 return self.expression(exp.Heredoc, this=self._prev.text) 6160 6161 if not self._match_text_seq("$"): 6162 return None 6163 6164 tags = ["$"] 6165 tag_text = None 6166 6167 if self._is_connected(): 6168 self._advance() 6169 tags.append(self._prev.text.upper()) 6170 else: 6171 self.raise_error("No closing $ found") 6172 6173 if tags[-1] != "$": 6174 if self._is_connected() and self._match_text_seq("$"): 6175 tag_text = tags[-1] 6176 tags.append("$") 6177 else: 6178 self.raise_error("No closing $ found") 6179 6180 heredoc_start = self._curr 6181 6182 while self._curr: 6183 if self._match_text_seq(*tags, advance=False): 6184 this = self._find_sql(heredoc_start, self._prev) 6185 self._advance(len(tags)) 6186 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6187 6188 self._advance() 6189 6190 self.raise_error(f"No closing {''.join(tags)} found") 6191 return None 6192 6193 def _find_parser( 6194 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6195 ) -> t.Optional[t.Callable]: 6196 if not self._curr: 6197 return None 6198 6199 index = self._index 6200 this = [] 6201 while True: 6202 # The current token might be multiple words 6203 curr = self._curr.text.upper() 6204 key = curr.split(" ") 6205 this.append(curr) 6206 6207 self._advance() 6208 result, trie = in_trie(trie, key) 6209 if result == TrieResult.FAILED: 6210 break 6211 6212 if result == TrieResult.EXISTS: 6213 subparser = parsers[" ".join(this)] 6214 return subparser 6215 6216 self._retreat(index) 6217 return None 6218 6219 def _match(self, token_type, advance=True, expression=None): 6220 if not self._curr: 6221 return None 6222 6223 if self._curr.token_type == token_type: 6224 if advance: 6225 self._advance() 6226 self._add_comments(expression) 6227 return True 6228 6229 return None 6230 6231 def _match_set(self, types, advance=True): 6232 if not self._curr: 6233 return None 6234 6235 if self._curr.token_type in types: 6236 if advance: 6237 self._advance() 6238 return True 6239 6240 return None 6241 6242 def _match_pair(self, token_type_a, token_type_b, advance=True): 6243 if not self._curr or not self._next: 6244 return None 6245 6246 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6247 if advance: 6248 self._advance(2) 6249 return True 6250 6251 return None 6252 6253 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6254 if not self._match(TokenType.L_PAREN, expression=expression): 6255 self.raise_error("Expecting (") 6256 6257 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6258 if not self._match(TokenType.R_PAREN, expression=expression): 6259 self.raise_error("Expecting )") 6260 6261 def _match_texts(self, texts, advance=True): 6262 if self._curr and self._curr.text.upper() in texts: 6263 if advance: 6264 self._advance() 6265 return True 6266 return None 6267 6268 def _match_text_seq(self, *texts, advance=True): 6269 index = self._index 6270 for text in texts: 6271 if self._curr and self._curr.text.upper() == text: 6272 self._advance() 6273 else: 6274 self._retreat(index) 6275 return None 6276 6277 if not advance: 6278 self._retreat(index) 6279 6280 return True 6281 6282 def _replace_lambda( 6283 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6284 ) -> t.Optional[exp.Expression]: 6285 if not node: 6286 return node 6287 6288 for column in node.find_all(exp.Column): 6289 if column.parts[0].name in lambda_variables: 6290 dot_or_id = column.to_dot() if column.table else column.this 6291 parent = column.parent 6292 6293 while isinstance(parent, exp.Dot): 6294 if not isinstance(parent.parent, exp.Dot): 6295 parent.replace(dot_or_id) 6296 break 6297 parent = parent.parent 6298 else: 6299 if column is node: 6300 node = dot_or_id 6301 else: 6302 column.replace(dot_or_id) 6303 return node 6304 6305 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6306 start = self._prev 6307 6308 # Not to be confused with TRUNCATE(number, decimals) function call 6309 if self._match(TokenType.L_PAREN): 6310 self._retreat(self._index - 2) 6311 return self._parse_function() 6312 6313 # Clickhouse supports TRUNCATE DATABASE as well 6314 is_database = self._match(TokenType.DATABASE) 6315 6316 self._match(TokenType.TABLE) 6317 6318 exists = self._parse_exists(not_=False) 6319 6320 expressions = self._parse_csv( 6321 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6322 ) 6323 6324 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6325 6326 if self._match_text_seq("RESTART", "IDENTITY"): 6327 identity = "RESTART" 6328 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6329 identity = "CONTINUE" 6330 else: 6331 identity = None 6332 6333 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6334 option = self._prev.text 6335 else: 6336 option = None 6337 6338 partition = self._parse_partition() 6339 6340 # Fallback case 6341 if self._curr: 6342 return self._parse_as_command(start) 6343 6344 return self.expression( 6345 exp.TruncateTable, 6346 expressions=expressions, 6347 is_database=is_database, 6348 exists=exists, 6349 cluster=cluster, 6350 identity=identity, 6351 option=option, 6352 partition=partition, 6353 ) 6354 6355 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6356 this = self._parse_ordered(self._parse_opclass) 6357 6358 if not self._match(TokenType.WITH): 6359 return this 6360 6361 op = self._parse_var(any_token=True) 6362 6363 return self.expression(exp.WithOperator, this=this, op=op) 6364 6365 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6366 opts = [] 6367 self._match(TokenType.EQ) 6368 self._match(TokenType.L_PAREN) 6369 while self._curr and not self._match(TokenType.R_PAREN): 6370 opts.append(self._parse_conjunction()) 6371 self._match(TokenType.COMMA) 6372 return opts 6373 6374 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6375 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6376 6377 options = [] 6378 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6379 option = self._parse_unquoted_field() 6380 value = None 6381 6382 # Some options are defined as functions with the values as params 6383 if not isinstance(option, exp.Func): 6384 prev = self._prev.text.upper() 6385 # Different dialects might separate options and values by white space, "=" and "AS" 6386 self._match(TokenType.EQ) 6387 self._match(TokenType.ALIAS) 6388 6389 if prev == "FILE_FORMAT" and self._match(TokenType.L_PAREN): 6390 # Snowflake FILE_FORMAT case 6391 value = self._parse_wrapped_options() 6392 else: 6393 value = self._parse_unquoted_field() 6394 6395 param = self.expression(exp.CopyParameter, this=option, expression=value) 6396 options.append(param) 6397 6398 if sep: 6399 self._match(sep) 6400 6401 return options 6402 6403 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6404 expr = self.expression(exp.Credentials) 6405 6406 if self._match_text_seq("STORAGE_INTEGRATION", advance=False): 6407 expr.set("storage", self._parse_conjunction()) 6408 if self._match_text_seq("CREDENTIALS"): 6409 # Snowflake supports CREDENTIALS = (...), while Redshift CREDENTIALS <string> 6410 creds = ( 6411 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6412 ) 6413 expr.set("credentials", creds) 6414 if self._match_text_seq("ENCRYPTION"): 6415 expr.set("encryption", self._parse_wrapped_options()) 6416 if self._match_text_seq("IAM_ROLE"): 6417 expr.set("iam_role", self._parse_field()) 6418 if self._match_text_seq("REGION"): 6419 expr.set("region", self._parse_field()) 6420 6421 return expr 6422 6423 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6424 return self._parse_field() 6425 6426 def _parse_copy(self) -> exp.Copy | exp.Command: 6427 start = self._prev 6428 6429 self._match(TokenType.INTO) 6430 6431 this = ( 6432 self._parse_conjunction() 6433 if self._match(TokenType.L_PAREN, advance=False) 6434 else self._parse_table(schema=True) 6435 ) 6436 6437 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6438 6439 files = self._parse_csv(self._parse_file_location) 6440 credentials = self._parse_credentials() 6441 6442 self._match_text_seq("WITH") 6443 6444 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6445 6446 # Fallback case 6447 if self._curr: 6448 return self._parse_as_command(start) 6449 6450 return self.expression( 6451 exp.Copy, 6452 this=this, 6453 kind=kind, 6454 credentials=credentials, 6455 files=files, 6456 params=params, 6457 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
52def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 53 # Default argument order is base, expression 54 this = seq_get(args, 0) 55 expression = seq_get(args, 1) 56 57 if expression: 58 if not dialect.LOG_BASE_FIRST: 59 this, expression = expression, this 60 return exp.Log(this=this, expression=expression) 61 62 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
65def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 66 def _builder(args: t.List, dialect: Dialect) -> E: 67 expression = expr_type( 68 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 69 ) 70 if len(args) > 2 and expr_type is exp.JSONExtract: 71 expression.set("expressions", args[2:]) 72 73 return expression 74 75 return _builder
78def build_mod(args: t.List) -> exp.Mod: 79 this = seq_get(args, 0) 80 expression = seq_get(args, 1) 81 82 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 83 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 84 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 85 86 return exp.Mod(this=this, expression=expression)
99class Parser(metaclass=_Parser): 100 """ 101 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 102 103 Args: 104 error_level: The desired error level. 105 Default: ErrorLevel.IMMEDIATE 106 error_message_context: The amount of context to capture from a query string when displaying 107 the error message (in number of characters). 108 Default: 100 109 max_errors: Maximum number of error messages to include in a raised ParseError. 110 This is only relevant if error_level is ErrorLevel.RAISE. 111 Default: 3 112 """ 113 114 FUNCTIONS: t.Dict[str, t.Callable] = { 115 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 116 "CONCAT": lambda args, dialect: exp.Concat( 117 expressions=args, 118 safe=not dialect.STRICT_STRING_CONCAT, 119 coalesce=dialect.CONCAT_COALESCE, 120 ), 121 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 122 expressions=args, 123 safe=not dialect.STRICT_STRING_CONCAT, 124 coalesce=dialect.CONCAT_COALESCE, 125 ), 126 "DATE_TO_DATE_STR": lambda args: exp.Cast( 127 this=seq_get(args, 0), 128 to=exp.DataType(this=exp.DataType.Type.TEXT), 129 ), 130 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 131 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 132 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 133 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 134 "LIKE": build_like, 135 "LOG": build_logarithm, 136 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 137 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 138 "MOD": build_mod, 139 "TIME_TO_TIME_STR": lambda args: exp.Cast( 140 this=seq_get(args, 0), 141 to=exp.DataType(this=exp.DataType.Type.TEXT), 142 ), 143 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 144 this=exp.Cast( 145 this=seq_get(args, 0), 146 to=exp.DataType(this=exp.DataType.Type.TEXT), 147 ), 148 start=exp.Literal.number(1), 149 length=exp.Literal.number(10), 150 ), 151 "VAR_MAP": build_var_map, 152 } 153 154 NO_PAREN_FUNCTIONS = { 155 TokenType.CURRENT_DATE: exp.CurrentDate, 156 TokenType.CURRENT_DATETIME: exp.CurrentDate, 157 TokenType.CURRENT_TIME: exp.CurrentTime, 158 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 159 TokenType.CURRENT_USER: exp.CurrentUser, 160 } 161 162 STRUCT_TYPE_TOKENS = { 163 TokenType.NESTED, 164 TokenType.OBJECT, 165 TokenType.STRUCT, 166 } 167 168 NESTED_TYPE_TOKENS = { 169 TokenType.ARRAY, 170 TokenType.LOWCARDINALITY, 171 TokenType.MAP, 172 TokenType.NULLABLE, 173 *STRUCT_TYPE_TOKENS, 174 } 175 176 ENUM_TYPE_TOKENS = { 177 TokenType.ENUM, 178 TokenType.ENUM8, 179 TokenType.ENUM16, 180 } 181 182 AGGREGATE_TYPE_TOKENS = { 183 TokenType.AGGREGATEFUNCTION, 184 TokenType.SIMPLEAGGREGATEFUNCTION, 185 } 186 187 TYPE_TOKENS = { 188 TokenType.BIT, 189 TokenType.BOOLEAN, 190 TokenType.TINYINT, 191 TokenType.UTINYINT, 192 TokenType.SMALLINT, 193 TokenType.USMALLINT, 194 TokenType.INT, 195 TokenType.UINT, 196 TokenType.BIGINT, 197 TokenType.UBIGINT, 198 TokenType.INT128, 199 TokenType.UINT128, 200 TokenType.INT256, 201 TokenType.UINT256, 202 TokenType.MEDIUMINT, 203 TokenType.UMEDIUMINT, 204 TokenType.FIXEDSTRING, 205 TokenType.FLOAT, 206 TokenType.DOUBLE, 207 TokenType.CHAR, 208 TokenType.NCHAR, 209 TokenType.VARCHAR, 210 TokenType.NVARCHAR, 211 TokenType.BPCHAR, 212 TokenType.TEXT, 213 TokenType.MEDIUMTEXT, 214 TokenType.LONGTEXT, 215 TokenType.MEDIUMBLOB, 216 TokenType.LONGBLOB, 217 TokenType.BINARY, 218 TokenType.VARBINARY, 219 TokenType.JSON, 220 TokenType.JSONB, 221 TokenType.INTERVAL, 222 TokenType.TINYBLOB, 223 TokenType.TINYTEXT, 224 TokenType.TIME, 225 TokenType.TIMETZ, 226 TokenType.TIMESTAMP, 227 TokenType.TIMESTAMP_S, 228 TokenType.TIMESTAMP_MS, 229 TokenType.TIMESTAMP_NS, 230 TokenType.TIMESTAMPTZ, 231 TokenType.TIMESTAMPLTZ, 232 TokenType.TIMESTAMPNTZ, 233 TokenType.DATETIME, 234 TokenType.DATETIME64, 235 TokenType.DATE, 236 TokenType.DATE32, 237 TokenType.INT4RANGE, 238 TokenType.INT4MULTIRANGE, 239 TokenType.INT8RANGE, 240 TokenType.INT8MULTIRANGE, 241 TokenType.NUMRANGE, 242 TokenType.NUMMULTIRANGE, 243 TokenType.TSRANGE, 244 TokenType.TSMULTIRANGE, 245 TokenType.TSTZRANGE, 246 TokenType.TSTZMULTIRANGE, 247 TokenType.DATERANGE, 248 TokenType.DATEMULTIRANGE, 249 TokenType.DECIMAL, 250 TokenType.UDECIMAL, 251 TokenType.BIGDECIMAL, 252 TokenType.UUID, 253 TokenType.GEOGRAPHY, 254 TokenType.GEOMETRY, 255 TokenType.HLLSKETCH, 256 TokenType.HSTORE, 257 TokenType.PSEUDO_TYPE, 258 TokenType.SUPER, 259 TokenType.SERIAL, 260 TokenType.SMALLSERIAL, 261 TokenType.BIGSERIAL, 262 TokenType.XML, 263 TokenType.YEAR, 264 TokenType.UNIQUEIDENTIFIER, 265 TokenType.USERDEFINED, 266 TokenType.MONEY, 267 TokenType.SMALLMONEY, 268 TokenType.ROWVERSION, 269 TokenType.IMAGE, 270 TokenType.VARIANT, 271 TokenType.OBJECT, 272 TokenType.OBJECT_IDENTIFIER, 273 TokenType.INET, 274 TokenType.IPADDRESS, 275 TokenType.IPPREFIX, 276 TokenType.IPV4, 277 TokenType.IPV6, 278 TokenType.UNKNOWN, 279 TokenType.NULL, 280 TokenType.NAME, 281 TokenType.TDIGEST, 282 *ENUM_TYPE_TOKENS, 283 *NESTED_TYPE_TOKENS, 284 *AGGREGATE_TYPE_TOKENS, 285 } 286 287 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 288 TokenType.BIGINT: TokenType.UBIGINT, 289 TokenType.INT: TokenType.UINT, 290 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 291 TokenType.SMALLINT: TokenType.USMALLINT, 292 TokenType.TINYINT: TokenType.UTINYINT, 293 TokenType.DECIMAL: TokenType.UDECIMAL, 294 } 295 296 SUBQUERY_PREDICATES = { 297 TokenType.ANY: exp.Any, 298 TokenType.ALL: exp.All, 299 TokenType.EXISTS: exp.Exists, 300 TokenType.SOME: exp.Any, 301 } 302 303 RESERVED_TOKENS = { 304 *Tokenizer.SINGLE_TOKENS.values(), 305 TokenType.SELECT, 306 } - {TokenType.IDENTIFIER} 307 308 DB_CREATABLES = { 309 TokenType.DATABASE, 310 TokenType.SCHEMA, 311 TokenType.TABLE, 312 TokenType.VIEW, 313 TokenType.MODEL, 314 TokenType.DICTIONARY, 315 TokenType.SEQUENCE, 316 TokenType.STORAGE_INTEGRATION, 317 } 318 319 CREATABLES = { 320 TokenType.COLUMN, 321 TokenType.CONSTRAINT, 322 TokenType.FUNCTION, 323 TokenType.INDEX, 324 TokenType.PROCEDURE, 325 TokenType.FOREIGN_KEY, 326 *DB_CREATABLES, 327 } 328 329 # Tokens that can represent identifiers 330 ID_VAR_TOKENS = { 331 TokenType.VAR, 332 TokenType.ANTI, 333 TokenType.APPLY, 334 TokenType.ASC, 335 TokenType.ASOF, 336 TokenType.AUTO_INCREMENT, 337 TokenType.BEGIN, 338 TokenType.BPCHAR, 339 TokenType.CACHE, 340 TokenType.CASE, 341 TokenType.COLLATE, 342 TokenType.COMMAND, 343 TokenType.COMMENT, 344 TokenType.COMMIT, 345 TokenType.CONSTRAINT, 346 TokenType.COPY, 347 TokenType.DEFAULT, 348 TokenType.DELETE, 349 TokenType.DESC, 350 TokenType.DESCRIBE, 351 TokenType.DICTIONARY, 352 TokenType.DIV, 353 TokenType.END, 354 TokenType.EXECUTE, 355 TokenType.ESCAPE, 356 TokenType.FALSE, 357 TokenType.FIRST, 358 TokenType.FILTER, 359 TokenType.FINAL, 360 TokenType.FORMAT, 361 TokenType.FULL, 362 TokenType.IDENTIFIER, 363 TokenType.IS, 364 TokenType.ISNULL, 365 TokenType.INTERVAL, 366 TokenType.KEEP, 367 TokenType.KILL, 368 TokenType.LEFT, 369 TokenType.LOAD, 370 TokenType.MERGE, 371 TokenType.NATURAL, 372 TokenType.NEXT, 373 TokenType.OFFSET, 374 TokenType.OPERATOR, 375 TokenType.ORDINALITY, 376 TokenType.OVERLAPS, 377 TokenType.OVERWRITE, 378 TokenType.PARTITION, 379 TokenType.PERCENT, 380 TokenType.PIVOT, 381 TokenType.PRAGMA, 382 TokenType.RANGE, 383 TokenType.RECURSIVE, 384 TokenType.REFERENCES, 385 TokenType.REFRESH, 386 TokenType.REPLACE, 387 TokenType.RIGHT, 388 TokenType.ROW, 389 TokenType.ROWS, 390 TokenType.SEMI, 391 TokenType.SET, 392 TokenType.SETTINGS, 393 TokenType.SHOW, 394 TokenType.TEMPORARY, 395 TokenType.TOP, 396 TokenType.TRUE, 397 TokenType.TRUNCATE, 398 TokenType.UNIQUE, 399 TokenType.UNPIVOT, 400 TokenType.UPDATE, 401 TokenType.USE, 402 TokenType.VOLATILE, 403 TokenType.WINDOW, 404 *CREATABLES, 405 *SUBQUERY_PREDICATES, 406 *TYPE_TOKENS, 407 *NO_PAREN_FUNCTIONS, 408 } 409 410 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 411 412 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 413 TokenType.ANTI, 414 TokenType.APPLY, 415 TokenType.ASOF, 416 TokenType.FULL, 417 TokenType.LEFT, 418 TokenType.LOCK, 419 TokenType.NATURAL, 420 TokenType.OFFSET, 421 TokenType.RIGHT, 422 TokenType.SEMI, 423 TokenType.WINDOW, 424 } 425 426 ALIAS_TOKENS = ID_VAR_TOKENS 427 428 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 429 430 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 431 432 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 433 434 FUNC_TOKENS = { 435 TokenType.COLLATE, 436 TokenType.COMMAND, 437 TokenType.CURRENT_DATE, 438 TokenType.CURRENT_DATETIME, 439 TokenType.CURRENT_TIMESTAMP, 440 TokenType.CURRENT_TIME, 441 TokenType.CURRENT_USER, 442 TokenType.FILTER, 443 TokenType.FIRST, 444 TokenType.FORMAT, 445 TokenType.GLOB, 446 TokenType.IDENTIFIER, 447 TokenType.INDEX, 448 TokenType.ISNULL, 449 TokenType.ILIKE, 450 TokenType.INSERT, 451 TokenType.LIKE, 452 TokenType.MERGE, 453 TokenType.OFFSET, 454 TokenType.PRIMARY_KEY, 455 TokenType.RANGE, 456 TokenType.REPLACE, 457 TokenType.RLIKE, 458 TokenType.ROW, 459 TokenType.UNNEST, 460 TokenType.VAR, 461 TokenType.LEFT, 462 TokenType.RIGHT, 463 TokenType.SEQUENCE, 464 TokenType.DATE, 465 TokenType.DATETIME, 466 TokenType.TABLE, 467 TokenType.TIMESTAMP, 468 TokenType.TIMESTAMPTZ, 469 TokenType.TRUNCATE, 470 TokenType.WINDOW, 471 TokenType.XOR, 472 *TYPE_TOKENS, 473 *SUBQUERY_PREDICATES, 474 } 475 476 CONJUNCTION = { 477 TokenType.AND: exp.And, 478 TokenType.OR: exp.Or, 479 } 480 481 EQUALITY = { 482 TokenType.COLON_EQ: exp.PropertyEQ, 483 TokenType.EQ: exp.EQ, 484 TokenType.NEQ: exp.NEQ, 485 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 486 } 487 488 COMPARISON = { 489 TokenType.GT: exp.GT, 490 TokenType.GTE: exp.GTE, 491 TokenType.LT: exp.LT, 492 TokenType.LTE: exp.LTE, 493 } 494 495 BITWISE = { 496 TokenType.AMP: exp.BitwiseAnd, 497 TokenType.CARET: exp.BitwiseXor, 498 TokenType.PIPE: exp.BitwiseOr, 499 } 500 501 TERM = { 502 TokenType.DASH: exp.Sub, 503 TokenType.PLUS: exp.Add, 504 TokenType.MOD: exp.Mod, 505 TokenType.COLLATE: exp.Collate, 506 } 507 508 FACTOR = { 509 TokenType.DIV: exp.IntDiv, 510 TokenType.LR_ARROW: exp.Distance, 511 TokenType.SLASH: exp.Div, 512 TokenType.STAR: exp.Mul, 513 } 514 515 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 516 517 TIMES = { 518 TokenType.TIME, 519 TokenType.TIMETZ, 520 } 521 522 TIMESTAMPS = { 523 TokenType.TIMESTAMP, 524 TokenType.TIMESTAMPTZ, 525 TokenType.TIMESTAMPLTZ, 526 *TIMES, 527 } 528 529 SET_OPERATIONS = { 530 TokenType.UNION, 531 TokenType.INTERSECT, 532 TokenType.EXCEPT, 533 } 534 535 JOIN_METHODS = { 536 TokenType.ASOF, 537 TokenType.NATURAL, 538 TokenType.POSITIONAL, 539 } 540 541 JOIN_SIDES = { 542 TokenType.LEFT, 543 TokenType.RIGHT, 544 TokenType.FULL, 545 } 546 547 JOIN_KINDS = { 548 TokenType.INNER, 549 TokenType.OUTER, 550 TokenType.CROSS, 551 TokenType.SEMI, 552 TokenType.ANTI, 553 } 554 555 JOIN_HINTS: t.Set[str] = set() 556 557 LAMBDAS = { 558 TokenType.ARROW: lambda self, expressions: self.expression( 559 exp.Lambda, 560 this=self._replace_lambda( 561 self._parse_conjunction(), 562 {node.name for node in expressions}, 563 ), 564 expressions=expressions, 565 ), 566 TokenType.FARROW: lambda self, expressions: self.expression( 567 exp.Kwarg, 568 this=exp.var(expressions[0].name), 569 expression=self._parse_conjunction(), 570 ), 571 } 572 573 COLUMN_OPERATORS = { 574 TokenType.DOT: None, 575 TokenType.DCOLON: lambda self, this, to: self.expression( 576 exp.Cast if self.STRICT_CAST else exp.TryCast, 577 this=this, 578 to=to, 579 ), 580 TokenType.ARROW: lambda self, this, path: self.expression( 581 exp.JSONExtract, 582 this=this, 583 expression=self.dialect.to_json_path(path), 584 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 585 ), 586 TokenType.DARROW: lambda self, this, path: self.expression( 587 exp.JSONExtractScalar, 588 this=this, 589 expression=self.dialect.to_json_path(path), 590 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 591 ), 592 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 593 exp.JSONBExtract, 594 this=this, 595 expression=path, 596 ), 597 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 598 exp.JSONBExtractScalar, 599 this=this, 600 expression=path, 601 ), 602 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 603 exp.JSONBContains, 604 this=this, 605 expression=key, 606 ), 607 } 608 609 EXPRESSION_PARSERS = { 610 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 611 exp.Column: lambda self: self._parse_column(), 612 exp.Condition: lambda self: self._parse_conjunction(), 613 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 614 exp.Expression: lambda self: self._parse_expression(), 615 exp.From: lambda self: self._parse_from(joins=True), 616 exp.Group: lambda self: self._parse_group(), 617 exp.Having: lambda self: self._parse_having(), 618 exp.Identifier: lambda self: self._parse_id_var(), 619 exp.Join: lambda self: self._parse_join(), 620 exp.Lambda: lambda self: self._parse_lambda(), 621 exp.Lateral: lambda self: self._parse_lateral(), 622 exp.Limit: lambda self: self._parse_limit(), 623 exp.Offset: lambda self: self._parse_offset(), 624 exp.Order: lambda self: self._parse_order(), 625 exp.Ordered: lambda self: self._parse_ordered(), 626 exp.Properties: lambda self: self._parse_properties(), 627 exp.Qualify: lambda self: self._parse_qualify(), 628 exp.Returning: lambda self: self._parse_returning(), 629 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 630 exp.Table: lambda self: self._parse_table_parts(), 631 exp.TableAlias: lambda self: self._parse_table_alias(), 632 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 633 exp.Where: lambda self: self._parse_where(), 634 exp.Window: lambda self: self._parse_named_window(), 635 exp.With: lambda self: self._parse_with(), 636 "JOIN_TYPE": lambda self: self._parse_join_parts(), 637 } 638 639 STATEMENT_PARSERS = { 640 TokenType.ALTER: lambda self: self._parse_alter(), 641 TokenType.BEGIN: lambda self: self._parse_transaction(), 642 TokenType.CACHE: lambda self: self._parse_cache(), 643 TokenType.COMMENT: lambda self: self._parse_comment(), 644 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 645 TokenType.COPY: lambda self: self._parse_copy(), 646 TokenType.CREATE: lambda self: self._parse_create(), 647 TokenType.DELETE: lambda self: self._parse_delete(), 648 TokenType.DESC: lambda self: self._parse_describe(), 649 TokenType.DESCRIBE: lambda self: self._parse_describe(), 650 TokenType.DROP: lambda self: self._parse_drop(), 651 TokenType.INSERT: lambda self: self._parse_insert(), 652 TokenType.KILL: lambda self: self._parse_kill(), 653 TokenType.LOAD: lambda self: self._parse_load(), 654 TokenType.MERGE: lambda self: self._parse_merge(), 655 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 656 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 657 TokenType.REFRESH: lambda self: self._parse_refresh(), 658 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 659 TokenType.SET: lambda self: self._parse_set(), 660 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 661 TokenType.UNCACHE: lambda self: self._parse_uncache(), 662 TokenType.UPDATE: lambda self: self._parse_update(), 663 TokenType.USE: lambda self: self.expression( 664 exp.Use, 665 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 666 this=self._parse_table(schema=False), 667 ), 668 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 669 } 670 671 UNARY_PARSERS = { 672 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 673 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 674 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 675 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 676 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 677 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 678 } 679 680 STRING_PARSERS = { 681 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 682 exp.RawString, this=token.text 683 ), 684 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 685 exp.National, this=token.text 686 ), 687 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 688 TokenType.STRING: lambda self, token: self.expression( 689 exp.Literal, this=token.text, is_string=True 690 ), 691 TokenType.UNICODE_STRING: lambda self, token: self.expression( 692 exp.UnicodeString, 693 this=token.text, 694 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 695 ), 696 } 697 698 NUMERIC_PARSERS = { 699 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 700 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 701 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 702 TokenType.NUMBER: lambda self, token: self.expression( 703 exp.Literal, this=token.text, is_string=False 704 ), 705 } 706 707 PRIMARY_PARSERS = { 708 **STRING_PARSERS, 709 **NUMERIC_PARSERS, 710 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 711 TokenType.NULL: lambda self, _: self.expression(exp.Null), 712 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 713 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 714 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 715 TokenType.STAR: lambda self, _: self.expression( 716 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 717 ), 718 } 719 720 PLACEHOLDER_PARSERS = { 721 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 722 TokenType.PARAMETER: lambda self: self._parse_parameter(), 723 TokenType.COLON: lambda self: ( 724 self.expression(exp.Placeholder, this=self._prev.text) 725 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 726 else None 727 ), 728 } 729 730 RANGE_PARSERS = { 731 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 732 TokenType.GLOB: binary_range_parser(exp.Glob), 733 TokenType.ILIKE: binary_range_parser(exp.ILike), 734 TokenType.IN: lambda self, this: self._parse_in(this), 735 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 736 TokenType.IS: lambda self, this: self._parse_is(this), 737 TokenType.LIKE: binary_range_parser(exp.Like), 738 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 739 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 740 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 741 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 742 } 743 744 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 745 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 746 "AUTO": lambda self: self._parse_auto_property(), 747 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 748 "BACKUP": lambda self: self.expression( 749 exp.BackupProperty, this=self._parse_var(any_token=True) 750 ), 751 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 752 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 753 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 754 "CHECKSUM": lambda self: self._parse_checksum(), 755 "CLUSTER BY": lambda self: self._parse_cluster(), 756 "CLUSTERED": lambda self: self._parse_clustered_by(), 757 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 758 exp.CollateProperty, **kwargs 759 ), 760 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 761 "CONTAINS": lambda self: self._parse_contains_property(), 762 "COPY": lambda self: self._parse_copy_property(), 763 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 764 "DEFINER": lambda self: self._parse_definer(), 765 "DETERMINISTIC": lambda self: self.expression( 766 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 767 ), 768 "DISTKEY": lambda self: self._parse_distkey(), 769 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 770 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 771 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 772 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 773 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 774 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 775 "FREESPACE": lambda self: self._parse_freespace(), 776 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 777 "HEAP": lambda self: self.expression(exp.HeapProperty), 778 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 779 "IMMUTABLE": lambda self: self.expression( 780 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 781 ), 782 "INHERITS": lambda self: self.expression( 783 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 784 ), 785 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 786 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 787 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 788 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 789 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 790 "LIKE": lambda self: self._parse_create_like(), 791 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 792 "LOCK": lambda self: self._parse_locking(), 793 "LOCKING": lambda self: self._parse_locking(), 794 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 795 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 796 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 797 "MODIFIES": lambda self: self._parse_modifies_property(), 798 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 799 "NO": lambda self: self._parse_no_property(), 800 "ON": lambda self: self._parse_on_property(), 801 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 802 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 803 "PARTITION": lambda self: self._parse_partitioned_of(), 804 "PARTITION BY": lambda self: self._parse_partitioned_by(), 805 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 806 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 807 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 808 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 809 "READS": lambda self: self._parse_reads_property(), 810 "REMOTE": lambda self: self._parse_remote_with_connection(), 811 "RETURNS": lambda self: self._parse_returns(), 812 "ROW": lambda self: self._parse_row(), 813 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 814 "SAMPLE": lambda self: self.expression( 815 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 816 ), 817 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 818 "SETTINGS": lambda self: self.expression( 819 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 820 ), 821 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 822 "SORTKEY": lambda self: self._parse_sortkey(), 823 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 824 "STABLE": lambda self: self.expression( 825 exp.StabilityProperty, this=exp.Literal.string("STABLE") 826 ), 827 "STORED": lambda self: self._parse_stored(), 828 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 829 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 830 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 831 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 832 "TO": lambda self: self._parse_to_table(), 833 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 834 "TRANSFORM": lambda self: self.expression( 835 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 836 ), 837 "TTL": lambda self: self._parse_ttl(), 838 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 839 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 840 "VOLATILE": lambda self: self._parse_volatile_property(), 841 "WITH": lambda self: self._parse_with_property(), 842 } 843 844 CONSTRAINT_PARSERS = { 845 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 846 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 847 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 848 "CHARACTER SET": lambda self: self.expression( 849 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 850 ), 851 "CHECK": lambda self: self.expression( 852 exp.CheckColumnConstraint, 853 this=self._parse_wrapped(self._parse_conjunction), 854 enforced=self._match_text_seq("ENFORCED"), 855 ), 856 "COLLATE": lambda self: self.expression( 857 exp.CollateColumnConstraint, this=self._parse_var() 858 ), 859 "COMMENT": lambda self: self.expression( 860 exp.CommentColumnConstraint, this=self._parse_string() 861 ), 862 "COMPRESS": lambda self: self._parse_compress(), 863 "CLUSTERED": lambda self: self.expression( 864 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 865 ), 866 "NONCLUSTERED": lambda self: self.expression( 867 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 868 ), 869 "DEFAULT": lambda self: self.expression( 870 exp.DefaultColumnConstraint, this=self._parse_bitwise() 871 ), 872 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 873 "EPHEMERAL": lambda self: self.expression( 874 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 875 ), 876 "EXCLUDE": lambda self: self.expression( 877 exp.ExcludeColumnConstraint, this=self._parse_index_params() 878 ), 879 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 880 "FORMAT": lambda self: self.expression( 881 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 882 ), 883 "GENERATED": lambda self: self._parse_generated_as_identity(), 884 "IDENTITY": lambda self: self._parse_auto_increment(), 885 "INLINE": lambda self: self._parse_inline(), 886 "LIKE": lambda self: self._parse_create_like(), 887 "NOT": lambda self: self._parse_not_constraint(), 888 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 889 "ON": lambda self: ( 890 self._match(TokenType.UPDATE) 891 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 892 ) 893 or self.expression(exp.OnProperty, this=self._parse_id_var()), 894 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 895 "PERIOD": lambda self: self._parse_period_for_system_time(), 896 "PRIMARY KEY": lambda self: self._parse_primary_key(), 897 "REFERENCES": lambda self: self._parse_references(match=False), 898 "TITLE": lambda self: self.expression( 899 exp.TitleColumnConstraint, this=self._parse_var_or_string() 900 ), 901 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 902 "UNIQUE": lambda self: self._parse_unique(), 903 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 904 "WITH": lambda self: self.expression( 905 exp.Properties, expressions=self._parse_wrapped_properties() 906 ), 907 } 908 909 ALTER_PARSERS = { 910 "ADD": lambda self: self._parse_alter_table_add(), 911 "ALTER": lambda self: self._parse_alter_table_alter(), 912 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 913 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 914 "DROP": lambda self: self._parse_alter_table_drop(), 915 "RENAME": lambda self: self._parse_alter_table_rename(), 916 } 917 918 ALTER_ALTER_PARSERS = { 919 "DISTKEY": lambda self: self._parse_alter_diststyle(), 920 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 921 "SORTKEY": lambda self: self._parse_alter_sortkey(), 922 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 923 } 924 925 SCHEMA_UNNAMED_CONSTRAINTS = { 926 "CHECK", 927 "EXCLUDE", 928 "FOREIGN KEY", 929 "LIKE", 930 "PERIOD", 931 "PRIMARY KEY", 932 "UNIQUE", 933 } 934 935 NO_PAREN_FUNCTION_PARSERS = { 936 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 937 "CASE": lambda self: self._parse_case(), 938 "IF": lambda self: self._parse_if(), 939 "NEXT": lambda self: self._parse_next_value_for(), 940 } 941 942 INVALID_FUNC_NAME_TOKENS = { 943 TokenType.IDENTIFIER, 944 TokenType.STRING, 945 } 946 947 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 948 949 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 950 951 FUNCTION_PARSERS = { 952 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 953 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 954 "DECODE": lambda self: self._parse_decode(), 955 "EXTRACT": lambda self: self._parse_extract(), 956 "JSON_OBJECT": lambda self: self._parse_json_object(), 957 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 958 "JSON_TABLE": lambda self: self._parse_json_table(), 959 "MATCH": lambda self: self._parse_match_against(), 960 "OPENJSON": lambda self: self._parse_open_json(), 961 "POSITION": lambda self: self._parse_position(), 962 "PREDICT": lambda self: self._parse_predict(), 963 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 964 "STRING_AGG": lambda self: self._parse_string_agg(), 965 "SUBSTRING": lambda self: self._parse_substring(), 966 "TRIM": lambda self: self._parse_trim(), 967 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 968 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 969 } 970 971 QUERY_MODIFIER_PARSERS = { 972 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 973 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 974 TokenType.WHERE: lambda self: ("where", self._parse_where()), 975 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 976 TokenType.HAVING: lambda self: ("having", self._parse_having()), 977 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 978 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 979 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 980 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 981 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 982 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 983 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 984 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 985 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 986 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 987 TokenType.CLUSTER_BY: lambda self: ( 988 "cluster", 989 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 990 ), 991 TokenType.DISTRIBUTE_BY: lambda self: ( 992 "distribute", 993 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 994 ), 995 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 996 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 997 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 998 } 999 1000 SET_PARSERS = { 1001 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1002 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1003 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1004 "TRANSACTION": lambda self: self._parse_set_transaction(), 1005 } 1006 1007 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1008 1009 TYPE_LITERAL_PARSERS = { 1010 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1011 } 1012 1013 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1014 1015 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1016 1017 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1018 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1019 "ISOLATION": ( 1020 ("LEVEL", "REPEATABLE", "READ"), 1021 ("LEVEL", "READ", "COMMITTED"), 1022 ("LEVEL", "READ", "UNCOMITTED"), 1023 ("LEVEL", "SERIALIZABLE"), 1024 ), 1025 "READ": ("WRITE", "ONLY"), 1026 } 1027 1028 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1029 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1030 ) 1031 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1032 1033 CREATE_SEQUENCE: OPTIONS_TYPE = { 1034 "SCALE": ("EXTEND", "NOEXTEND"), 1035 "SHARD": ("EXTEND", "NOEXTEND"), 1036 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1037 **dict.fromkeys( 1038 ( 1039 "SESSION", 1040 "GLOBAL", 1041 "KEEP", 1042 "NOKEEP", 1043 "ORDER", 1044 "NOORDER", 1045 "NOCACHE", 1046 "CYCLE", 1047 "NOCYCLE", 1048 "NOMINVALUE", 1049 "NOMAXVALUE", 1050 "NOSCALE", 1051 "NOSHARD", 1052 ), 1053 tuple(), 1054 ), 1055 } 1056 1057 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1058 1059 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1060 1061 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1062 1063 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1064 1065 CLONE_KEYWORDS = {"CLONE", "COPY"} 1066 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1067 1068 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1069 1070 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1071 1072 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1073 1074 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1075 1076 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1077 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1078 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1079 1080 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1081 1082 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1083 1084 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1085 1086 DISTINCT_TOKENS = {TokenType.DISTINCT} 1087 1088 NULL_TOKENS = {TokenType.NULL} 1089 1090 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1091 1092 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1093 1094 STRICT_CAST = True 1095 1096 PREFIXED_PIVOT_COLUMNS = False 1097 IDENTIFY_PIVOT_STRINGS = False 1098 1099 LOG_DEFAULTS_TO_LN = False 1100 1101 # Whether ADD is present for each column added by ALTER TABLE 1102 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1103 1104 # Whether the table sample clause expects CSV syntax 1105 TABLESAMPLE_CSV = False 1106 1107 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1108 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1109 1110 # Whether the TRIM function expects the characters to trim as its first argument 1111 TRIM_PATTERN_FIRST = False 1112 1113 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1114 STRING_ALIASES = False 1115 1116 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1117 MODIFIERS_ATTACHED_TO_UNION = True 1118 UNION_MODIFIERS = {"order", "limit", "offset"} 1119 1120 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1121 NO_PAREN_IF_COMMANDS = True 1122 1123 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1124 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1125 1126 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1127 # If this is True and '(' is not found, the keyword will be treated as an identifier 1128 VALUES_FOLLOWED_BY_PAREN = True 1129 1130 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1131 SUPPORTS_IMPLICIT_UNNEST = False 1132 1133 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1134 INTERVAL_SPANS = True 1135 1136 # Whether a PARTITION clause can follow a table reference 1137 SUPPORTS_PARTITION_SELECTION = False 1138 1139 __slots__ = ( 1140 "error_level", 1141 "error_message_context", 1142 "max_errors", 1143 "dialect", 1144 "sql", 1145 "errors", 1146 "_tokens", 1147 "_index", 1148 "_curr", 1149 "_next", 1150 "_prev", 1151 "_prev_comments", 1152 ) 1153 1154 # Autofilled 1155 SHOW_TRIE: t.Dict = {} 1156 SET_TRIE: t.Dict = {} 1157 1158 def __init__( 1159 self, 1160 error_level: t.Optional[ErrorLevel] = None, 1161 error_message_context: int = 100, 1162 max_errors: int = 3, 1163 dialect: DialectType = None, 1164 ): 1165 from sqlglot.dialects import Dialect 1166 1167 self.error_level = error_level or ErrorLevel.IMMEDIATE 1168 self.error_message_context = error_message_context 1169 self.max_errors = max_errors 1170 self.dialect = Dialect.get_or_raise(dialect) 1171 self.reset() 1172 1173 def reset(self): 1174 self.sql = "" 1175 self.errors = [] 1176 self._tokens = [] 1177 self._index = 0 1178 self._curr = None 1179 self._next = None 1180 self._prev = None 1181 self._prev_comments = None 1182 1183 def parse( 1184 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1185 ) -> t.List[t.Optional[exp.Expression]]: 1186 """ 1187 Parses a list of tokens and returns a list of syntax trees, one tree 1188 per parsed SQL statement. 1189 1190 Args: 1191 raw_tokens: The list of tokens. 1192 sql: The original SQL string, used to produce helpful debug messages. 1193 1194 Returns: 1195 The list of the produced syntax trees. 1196 """ 1197 return self._parse( 1198 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1199 ) 1200 1201 def parse_into( 1202 self, 1203 expression_types: exp.IntoType, 1204 raw_tokens: t.List[Token], 1205 sql: t.Optional[str] = None, 1206 ) -> t.List[t.Optional[exp.Expression]]: 1207 """ 1208 Parses a list of tokens into a given Expression type. If a collection of Expression 1209 types is given instead, this method will try to parse the token list into each one 1210 of them, stopping at the first for which the parsing succeeds. 1211 1212 Args: 1213 expression_types: The expression type(s) to try and parse the token list into. 1214 raw_tokens: The list of tokens. 1215 sql: The original SQL string, used to produce helpful debug messages. 1216 1217 Returns: 1218 The target Expression. 1219 """ 1220 errors = [] 1221 for expression_type in ensure_list(expression_types): 1222 parser = self.EXPRESSION_PARSERS.get(expression_type) 1223 if not parser: 1224 raise TypeError(f"No parser registered for {expression_type}") 1225 1226 try: 1227 return self._parse(parser, raw_tokens, sql) 1228 except ParseError as e: 1229 e.errors[0]["into_expression"] = expression_type 1230 errors.append(e) 1231 1232 raise ParseError( 1233 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1234 errors=merge_errors(errors), 1235 ) from errors[-1] 1236 1237 def _parse( 1238 self, 1239 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1240 raw_tokens: t.List[Token], 1241 sql: t.Optional[str] = None, 1242 ) -> t.List[t.Optional[exp.Expression]]: 1243 self.reset() 1244 self.sql = sql or "" 1245 1246 total = len(raw_tokens) 1247 chunks: t.List[t.List[Token]] = [[]] 1248 1249 for i, token in enumerate(raw_tokens): 1250 if token.token_type == TokenType.SEMICOLON: 1251 if token.comments: 1252 chunks.append([token]) 1253 1254 if i < total - 1: 1255 chunks.append([]) 1256 else: 1257 chunks[-1].append(token) 1258 1259 expressions = [] 1260 1261 for tokens in chunks: 1262 self._index = -1 1263 self._tokens = tokens 1264 self._advance() 1265 1266 expressions.append(parse_method(self)) 1267 1268 if self._index < len(self._tokens): 1269 self.raise_error("Invalid expression / Unexpected token") 1270 1271 self.check_errors() 1272 1273 return expressions 1274 1275 def check_errors(self) -> None: 1276 """Logs or raises any found errors, depending on the chosen error level setting.""" 1277 if self.error_level == ErrorLevel.WARN: 1278 for error in self.errors: 1279 logger.error(str(error)) 1280 elif self.error_level == ErrorLevel.RAISE and self.errors: 1281 raise ParseError( 1282 concat_messages(self.errors, self.max_errors), 1283 errors=merge_errors(self.errors), 1284 ) 1285 1286 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1287 """ 1288 Appends an error in the list of recorded errors or raises it, depending on the chosen 1289 error level setting. 1290 """ 1291 token = token or self._curr or self._prev or Token.string("") 1292 start = token.start 1293 end = token.end + 1 1294 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1295 highlight = self.sql[start:end] 1296 end_context = self.sql[end : end + self.error_message_context] 1297 1298 error = ParseError.new( 1299 f"{message}. Line {token.line}, Col: {token.col}.\n" 1300 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1301 description=message, 1302 line=token.line, 1303 col=token.col, 1304 start_context=start_context, 1305 highlight=highlight, 1306 end_context=end_context, 1307 ) 1308 1309 if self.error_level == ErrorLevel.IMMEDIATE: 1310 raise error 1311 1312 self.errors.append(error) 1313 1314 def expression( 1315 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1316 ) -> E: 1317 """ 1318 Creates a new, validated Expression. 1319 1320 Args: 1321 exp_class: The expression class to instantiate. 1322 comments: An optional list of comments to attach to the expression. 1323 kwargs: The arguments to set for the expression along with their respective values. 1324 1325 Returns: 1326 The target expression. 1327 """ 1328 instance = exp_class(**kwargs) 1329 instance.add_comments(comments) if comments else self._add_comments(instance) 1330 return self.validate_expression(instance) 1331 1332 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1333 if expression and self._prev_comments: 1334 expression.add_comments(self._prev_comments) 1335 self._prev_comments = None 1336 1337 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1338 """ 1339 Validates an Expression, making sure that all its mandatory arguments are set. 1340 1341 Args: 1342 expression: The expression to validate. 1343 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1344 1345 Returns: 1346 The validated expression. 1347 """ 1348 if self.error_level != ErrorLevel.IGNORE: 1349 for error_message in expression.error_messages(args): 1350 self.raise_error(error_message) 1351 1352 return expression 1353 1354 def _find_sql(self, start: Token, end: Token) -> str: 1355 return self.sql[start.start : end.end + 1] 1356 1357 def _is_connected(self) -> bool: 1358 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1359 1360 def _advance(self, times: int = 1) -> None: 1361 self._index += times 1362 self._curr = seq_get(self._tokens, self._index) 1363 self._next = seq_get(self._tokens, self._index + 1) 1364 1365 if self._index > 0: 1366 self._prev = self._tokens[self._index - 1] 1367 self._prev_comments = self._prev.comments 1368 else: 1369 self._prev = None 1370 self._prev_comments = None 1371 1372 def _retreat(self, index: int) -> None: 1373 if index != self._index: 1374 self._advance(index - self._index) 1375 1376 def _warn_unsupported(self) -> None: 1377 if len(self._tokens) <= 1: 1378 return 1379 1380 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1381 # interested in emitting a warning for the one being currently processed. 1382 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1383 1384 logger.warning( 1385 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1386 ) 1387 1388 def _parse_command(self) -> exp.Command: 1389 self._warn_unsupported() 1390 return self.expression( 1391 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1392 ) 1393 1394 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1395 """ 1396 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1397 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1398 the parser state accordingly 1399 """ 1400 index = self._index 1401 error_level = self.error_level 1402 1403 self.error_level = ErrorLevel.IMMEDIATE 1404 try: 1405 this = parse_method() 1406 except ParseError: 1407 this = None 1408 finally: 1409 if not this or retreat: 1410 self._retreat(index) 1411 self.error_level = error_level 1412 1413 return this 1414 1415 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1416 start = self._prev 1417 exists = self._parse_exists() if allow_exists else None 1418 1419 self._match(TokenType.ON) 1420 1421 materialized = self._match_text_seq("MATERIALIZED") 1422 kind = self._match_set(self.CREATABLES) and self._prev 1423 if not kind: 1424 return self._parse_as_command(start) 1425 1426 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1427 this = self._parse_user_defined_function(kind=kind.token_type) 1428 elif kind.token_type == TokenType.TABLE: 1429 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1430 elif kind.token_type == TokenType.COLUMN: 1431 this = self._parse_column() 1432 else: 1433 this = self._parse_id_var() 1434 1435 self._match(TokenType.IS) 1436 1437 return self.expression( 1438 exp.Comment, 1439 this=this, 1440 kind=kind.text, 1441 expression=self._parse_string(), 1442 exists=exists, 1443 materialized=materialized, 1444 ) 1445 1446 def _parse_to_table( 1447 self, 1448 ) -> exp.ToTableProperty: 1449 table = self._parse_table_parts(schema=True) 1450 return self.expression(exp.ToTableProperty, this=table) 1451 1452 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1453 def _parse_ttl(self) -> exp.Expression: 1454 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1455 this = self._parse_bitwise() 1456 1457 if self._match_text_seq("DELETE"): 1458 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1459 if self._match_text_seq("RECOMPRESS"): 1460 return self.expression( 1461 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1462 ) 1463 if self._match_text_seq("TO", "DISK"): 1464 return self.expression( 1465 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1466 ) 1467 if self._match_text_seq("TO", "VOLUME"): 1468 return self.expression( 1469 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1470 ) 1471 1472 return this 1473 1474 expressions = self._parse_csv(_parse_ttl_action) 1475 where = self._parse_where() 1476 group = self._parse_group() 1477 1478 aggregates = None 1479 if group and self._match(TokenType.SET): 1480 aggregates = self._parse_csv(self._parse_set_item) 1481 1482 return self.expression( 1483 exp.MergeTreeTTL, 1484 expressions=expressions, 1485 where=where, 1486 group=group, 1487 aggregates=aggregates, 1488 ) 1489 1490 def _parse_statement(self) -> t.Optional[exp.Expression]: 1491 if self._curr is None: 1492 return None 1493 1494 if self._match_set(self.STATEMENT_PARSERS): 1495 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1496 1497 if self._match_set(self.dialect.tokenizer.COMMANDS): 1498 return self._parse_command() 1499 1500 expression = self._parse_expression() 1501 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1502 return self._parse_query_modifiers(expression) 1503 1504 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1505 start = self._prev 1506 temporary = self._match(TokenType.TEMPORARY) 1507 materialized = self._match_text_seq("MATERIALIZED") 1508 1509 kind = self._match_set(self.CREATABLES) and self._prev.text 1510 if not kind: 1511 return self._parse_as_command(start) 1512 1513 if_exists = exists or self._parse_exists() 1514 table = self._parse_table_parts( 1515 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1516 ) 1517 1518 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1519 1520 if self._match(TokenType.L_PAREN, advance=False): 1521 expressions = self._parse_wrapped_csv(self._parse_types) 1522 else: 1523 expressions = None 1524 1525 return self.expression( 1526 exp.Drop, 1527 comments=start.comments, 1528 exists=if_exists, 1529 this=table, 1530 expressions=expressions, 1531 kind=kind.upper(), 1532 temporary=temporary, 1533 materialized=materialized, 1534 cascade=self._match_text_seq("CASCADE"), 1535 constraints=self._match_text_seq("CONSTRAINTS"), 1536 purge=self._match_text_seq("PURGE"), 1537 cluster=cluster, 1538 ) 1539 1540 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1541 return ( 1542 self._match_text_seq("IF") 1543 and (not not_ or self._match(TokenType.NOT)) 1544 and self._match(TokenType.EXISTS) 1545 ) 1546 1547 def _parse_create(self) -> exp.Create | exp.Command: 1548 # Note: this can't be None because we've matched a statement parser 1549 start = self._prev 1550 comments = self._prev_comments 1551 1552 replace = ( 1553 start.token_type == TokenType.REPLACE 1554 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1555 or self._match_pair(TokenType.OR, TokenType.ALTER) 1556 ) 1557 1558 unique = self._match(TokenType.UNIQUE) 1559 1560 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1561 self._advance() 1562 1563 properties = None 1564 create_token = self._match_set(self.CREATABLES) and self._prev 1565 1566 if not create_token: 1567 # exp.Properties.Location.POST_CREATE 1568 properties = self._parse_properties() 1569 create_token = self._match_set(self.CREATABLES) and self._prev 1570 1571 if not properties or not create_token: 1572 return self._parse_as_command(start) 1573 1574 exists = self._parse_exists(not_=True) 1575 this = None 1576 expression: t.Optional[exp.Expression] = None 1577 indexes = None 1578 no_schema_binding = None 1579 begin = None 1580 end = None 1581 clone = None 1582 1583 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1584 nonlocal properties 1585 if properties and temp_props: 1586 properties.expressions.extend(temp_props.expressions) 1587 elif temp_props: 1588 properties = temp_props 1589 1590 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1591 this = self._parse_user_defined_function(kind=create_token.token_type) 1592 1593 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1594 extend_props(self._parse_properties()) 1595 1596 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1597 1598 if not expression: 1599 if self._match(TokenType.COMMAND): 1600 expression = self._parse_as_command(self._prev) 1601 else: 1602 begin = self._match(TokenType.BEGIN) 1603 return_ = self._match_text_seq("RETURN") 1604 1605 if self._match(TokenType.STRING, advance=False): 1606 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1607 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1608 expression = self._parse_string() 1609 extend_props(self._parse_properties()) 1610 else: 1611 expression = self._parse_statement() 1612 1613 end = self._match_text_seq("END") 1614 1615 if return_: 1616 expression = self.expression(exp.Return, this=expression) 1617 elif create_token.token_type == TokenType.INDEX: 1618 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1619 if not self._match(TokenType.ON): 1620 index = self._parse_id_var() 1621 anonymous = False 1622 else: 1623 index = None 1624 anonymous = True 1625 1626 this = self._parse_index(index=index, anonymous=anonymous) 1627 elif create_token.token_type in self.DB_CREATABLES: 1628 table_parts = self._parse_table_parts( 1629 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1630 ) 1631 1632 # exp.Properties.Location.POST_NAME 1633 self._match(TokenType.COMMA) 1634 extend_props(self._parse_properties(before=True)) 1635 1636 this = self._parse_schema(this=table_parts) 1637 1638 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1639 extend_props(self._parse_properties()) 1640 1641 self._match(TokenType.ALIAS) 1642 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1643 # exp.Properties.Location.POST_ALIAS 1644 extend_props(self._parse_properties()) 1645 1646 if create_token.token_type == TokenType.SEQUENCE: 1647 expression = self._parse_types() 1648 extend_props(self._parse_properties()) 1649 else: 1650 expression = self._parse_ddl_select() 1651 1652 if create_token.token_type == TokenType.TABLE: 1653 # exp.Properties.Location.POST_EXPRESSION 1654 extend_props(self._parse_properties()) 1655 1656 indexes = [] 1657 while True: 1658 index = self._parse_index() 1659 1660 # exp.Properties.Location.POST_INDEX 1661 extend_props(self._parse_properties()) 1662 1663 if not index: 1664 break 1665 else: 1666 self._match(TokenType.COMMA) 1667 indexes.append(index) 1668 elif create_token.token_type == TokenType.VIEW: 1669 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1670 no_schema_binding = True 1671 1672 shallow = self._match_text_seq("SHALLOW") 1673 1674 if self._match_texts(self.CLONE_KEYWORDS): 1675 copy = self._prev.text.lower() == "copy" 1676 clone = self.expression( 1677 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1678 ) 1679 1680 if self._curr: 1681 return self._parse_as_command(start) 1682 1683 return self.expression( 1684 exp.Create, 1685 comments=comments, 1686 this=this, 1687 kind=create_token.text.upper(), 1688 replace=replace, 1689 unique=unique, 1690 expression=expression, 1691 exists=exists, 1692 properties=properties, 1693 indexes=indexes, 1694 no_schema_binding=no_schema_binding, 1695 begin=begin, 1696 end=end, 1697 clone=clone, 1698 ) 1699 1700 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1701 seq = exp.SequenceProperties() 1702 1703 options = [] 1704 index = self._index 1705 1706 while self._curr: 1707 self._match(TokenType.COMMA) 1708 if self._match_text_seq("INCREMENT"): 1709 self._match_text_seq("BY") 1710 self._match_text_seq("=") 1711 seq.set("increment", self._parse_term()) 1712 elif self._match_text_seq("MINVALUE"): 1713 seq.set("minvalue", self._parse_term()) 1714 elif self._match_text_seq("MAXVALUE"): 1715 seq.set("maxvalue", self._parse_term()) 1716 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1717 self._match_text_seq("=") 1718 seq.set("start", self._parse_term()) 1719 elif self._match_text_seq("CACHE"): 1720 # T-SQL allows empty CACHE which is initialized dynamically 1721 seq.set("cache", self._parse_number() or True) 1722 elif self._match_text_seq("OWNED", "BY"): 1723 # "OWNED BY NONE" is the default 1724 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1725 else: 1726 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1727 if opt: 1728 options.append(opt) 1729 else: 1730 break 1731 1732 seq.set("options", options if options else None) 1733 return None if self._index == index else seq 1734 1735 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1736 # only used for teradata currently 1737 self._match(TokenType.COMMA) 1738 1739 kwargs = { 1740 "no": self._match_text_seq("NO"), 1741 "dual": self._match_text_seq("DUAL"), 1742 "before": self._match_text_seq("BEFORE"), 1743 "default": self._match_text_seq("DEFAULT"), 1744 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1745 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1746 "after": self._match_text_seq("AFTER"), 1747 "minimum": self._match_texts(("MIN", "MINIMUM")), 1748 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1749 } 1750 1751 if self._match_texts(self.PROPERTY_PARSERS): 1752 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1753 try: 1754 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1755 except TypeError: 1756 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1757 1758 return None 1759 1760 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1761 return self._parse_wrapped_csv(self._parse_property) 1762 1763 def _parse_property(self) -> t.Optional[exp.Expression]: 1764 if self._match_texts(self.PROPERTY_PARSERS): 1765 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1766 1767 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1768 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1769 1770 if self._match_text_seq("COMPOUND", "SORTKEY"): 1771 return self._parse_sortkey(compound=True) 1772 1773 if self._match_text_seq("SQL", "SECURITY"): 1774 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1775 1776 index = self._index 1777 key = self._parse_column() 1778 1779 if not self._match(TokenType.EQ): 1780 self._retreat(index) 1781 return self._parse_sequence_properties() 1782 1783 return self.expression( 1784 exp.Property, 1785 this=key.to_dot() if isinstance(key, exp.Column) else key, 1786 value=self._parse_bitwise() or self._parse_var(any_token=True), 1787 ) 1788 1789 def _parse_stored(self) -> exp.FileFormatProperty: 1790 self._match(TokenType.ALIAS) 1791 1792 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1793 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1794 1795 return self.expression( 1796 exp.FileFormatProperty, 1797 this=( 1798 self.expression( 1799 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1800 ) 1801 if input_format or output_format 1802 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1803 ), 1804 ) 1805 1806 def _parse_unquoted_field(self): 1807 field = self._parse_field() 1808 if isinstance(field, exp.Identifier) and not field.quoted: 1809 field = exp.var(field) 1810 1811 return field 1812 1813 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1814 self._match(TokenType.EQ) 1815 self._match(TokenType.ALIAS) 1816 1817 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1818 1819 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1820 properties = [] 1821 while True: 1822 if before: 1823 prop = self._parse_property_before() 1824 else: 1825 prop = self._parse_property() 1826 if not prop: 1827 break 1828 for p in ensure_list(prop): 1829 properties.append(p) 1830 1831 if properties: 1832 return self.expression(exp.Properties, expressions=properties) 1833 1834 return None 1835 1836 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1837 return self.expression( 1838 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1839 ) 1840 1841 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1842 if self._index >= 2: 1843 pre_volatile_token = self._tokens[self._index - 2] 1844 else: 1845 pre_volatile_token = None 1846 1847 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1848 return exp.VolatileProperty() 1849 1850 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1851 1852 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1853 self._match_pair(TokenType.EQ, TokenType.ON) 1854 1855 prop = self.expression(exp.WithSystemVersioningProperty) 1856 if self._match(TokenType.L_PAREN): 1857 self._match_text_seq("HISTORY_TABLE", "=") 1858 prop.set("this", self._parse_table_parts()) 1859 1860 if self._match(TokenType.COMMA): 1861 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1862 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1863 1864 self._match_r_paren() 1865 1866 return prop 1867 1868 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1869 if self._match(TokenType.L_PAREN, advance=False): 1870 return self._parse_wrapped_properties() 1871 1872 if self._match_text_seq("JOURNAL"): 1873 return self._parse_withjournaltable() 1874 1875 if self._match_texts(self.VIEW_ATTRIBUTES): 1876 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1877 1878 if self._match_text_seq("DATA"): 1879 return self._parse_withdata(no=False) 1880 elif self._match_text_seq("NO", "DATA"): 1881 return self._parse_withdata(no=True) 1882 1883 if not self._next: 1884 return None 1885 1886 return self._parse_withisolatedloading() 1887 1888 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1889 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1890 self._match(TokenType.EQ) 1891 1892 user = self._parse_id_var() 1893 self._match(TokenType.PARAMETER) 1894 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1895 1896 if not user or not host: 1897 return None 1898 1899 return exp.DefinerProperty(this=f"{user}@{host}") 1900 1901 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1902 self._match(TokenType.TABLE) 1903 self._match(TokenType.EQ) 1904 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1905 1906 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1907 return self.expression(exp.LogProperty, no=no) 1908 1909 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1910 return self.expression(exp.JournalProperty, **kwargs) 1911 1912 def _parse_checksum(self) -> exp.ChecksumProperty: 1913 self._match(TokenType.EQ) 1914 1915 on = None 1916 if self._match(TokenType.ON): 1917 on = True 1918 elif self._match_text_seq("OFF"): 1919 on = False 1920 1921 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1922 1923 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1924 return self.expression( 1925 exp.Cluster, 1926 expressions=( 1927 self._parse_wrapped_csv(self._parse_ordered) 1928 if wrapped 1929 else self._parse_csv(self._parse_ordered) 1930 ), 1931 ) 1932 1933 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1934 self._match_text_seq("BY") 1935 1936 self._match_l_paren() 1937 expressions = self._parse_csv(self._parse_column) 1938 self._match_r_paren() 1939 1940 if self._match_text_seq("SORTED", "BY"): 1941 self._match_l_paren() 1942 sorted_by = self._parse_csv(self._parse_ordered) 1943 self._match_r_paren() 1944 else: 1945 sorted_by = None 1946 1947 self._match(TokenType.INTO) 1948 buckets = self._parse_number() 1949 self._match_text_seq("BUCKETS") 1950 1951 return self.expression( 1952 exp.ClusteredByProperty, 1953 expressions=expressions, 1954 sorted_by=sorted_by, 1955 buckets=buckets, 1956 ) 1957 1958 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1959 if not self._match_text_seq("GRANTS"): 1960 self._retreat(self._index - 1) 1961 return None 1962 1963 return self.expression(exp.CopyGrantsProperty) 1964 1965 def _parse_freespace(self) -> exp.FreespaceProperty: 1966 self._match(TokenType.EQ) 1967 return self.expression( 1968 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1969 ) 1970 1971 def _parse_mergeblockratio( 1972 self, no: bool = False, default: bool = False 1973 ) -> exp.MergeBlockRatioProperty: 1974 if self._match(TokenType.EQ): 1975 return self.expression( 1976 exp.MergeBlockRatioProperty, 1977 this=self._parse_number(), 1978 percent=self._match(TokenType.PERCENT), 1979 ) 1980 1981 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1982 1983 def _parse_datablocksize( 1984 self, 1985 default: t.Optional[bool] = None, 1986 minimum: t.Optional[bool] = None, 1987 maximum: t.Optional[bool] = None, 1988 ) -> exp.DataBlocksizeProperty: 1989 self._match(TokenType.EQ) 1990 size = self._parse_number() 1991 1992 units = None 1993 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1994 units = self._prev.text 1995 1996 return self.expression( 1997 exp.DataBlocksizeProperty, 1998 size=size, 1999 units=units, 2000 default=default, 2001 minimum=minimum, 2002 maximum=maximum, 2003 ) 2004 2005 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2006 self._match(TokenType.EQ) 2007 always = self._match_text_seq("ALWAYS") 2008 manual = self._match_text_seq("MANUAL") 2009 never = self._match_text_seq("NEVER") 2010 default = self._match_text_seq("DEFAULT") 2011 2012 autotemp = None 2013 if self._match_text_seq("AUTOTEMP"): 2014 autotemp = self._parse_schema() 2015 2016 return self.expression( 2017 exp.BlockCompressionProperty, 2018 always=always, 2019 manual=manual, 2020 never=never, 2021 default=default, 2022 autotemp=autotemp, 2023 ) 2024 2025 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2026 index = self._index 2027 no = self._match_text_seq("NO") 2028 concurrent = self._match_text_seq("CONCURRENT") 2029 2030 if not self._match_text_seq("ISOLATED", "LOADING"): 2031 self._retreat(index) 2032 return None 2033 2034 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2035 return self.expression( 2036 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2037 ) 2038 2039 def _parse_locking(self) -> exp.LockingProperty: 2040 if self._match(TokenType.TABLE): 2041 kind = "TABLE" 2042 elif self._match(TokenType.VIEW): 2043 kind = "VIEW" 2044 elif self._match(TokenType.ROW): 2045 kind = "ROW" 2046 elif self._match_text_seq("DATABASE"): 2047 kind = "DATABASE" 2048 else: 2049 kind = None 2050 2051 if kind in ("DATABASE", "TABLE", "VIEW"): 2052 this = self._parse_table_parts() 2053 else: 2054 this = None 2055 2056 if self._match(TokenType.FOR): 2057 for_or_in = "FOR" 2058 elif self._match(TokenType.IN): 2059 for_or_in = "IN" 2060 else: 2061 for_or_in = None 2062 2063 if self._match_text_seq("ACCESS"): 2064 lock_type = "ACCESS" 2065 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2066 lock_type = "EXCLUSIVE" 2067 elif self._match_text_seq("SHARE"): 2068 lock_type = "SHARE" 2069 elif self._match_text_seq("READ"): 2070 lock_type = "READ" 2071 elif self._match_text_seq("WRITE"): 2072 lock_type = "WRITE" 2073 elif self._match_text_seq("CHECKSUM"): 2074 lock_type = "CHECKSUM" 2075 else: 2076 lock_type = None 2077 2078 override = self._match_text_seq("OVERRIDE") 2079 2080 return self.expression( 2081 exp.LockingProperty, 2082 this=this, 2083 kind=kind, 2084 for_or_in=for_or_in, 2085 lock_type=lock_type, 2086 override=override, 2087 ) 2088 2089 def _parse_partition_by(self) -> t.List[exp.Expression]: 2090 if self._match(TokenType.PARTITION_BY): 2091 return self._parse_csv(self._parse_conjunction) 2092 return [] 2093 2094 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2095 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2096 if self._match_text_seq("MINVALUE"): 2097 return exp.var("MINVALUE") 2098 if self._match_text_seq("MAXVALUE"): 2099 return exp.var("MAXVALUE") 2100 return self._parse_bitwise() 2101 2102 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2103 expression = None 2104 from_expressions = None 2105 to_expressions = None 2106 2107 if self._match(TokenType.IN): 2108 this = self._parse_wrapped_csv(self._parse_bitwise) 2109 elif self._match(TokenType.FROM): 2110 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2111 self._match_text_seq("TO") 2112 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2113 elif self._match_text_seq("WITH", "(", "MODULUS"): 2114 this = self._parse_number() 2115 self._match_text_seq(",", "REMAINDER") 2116 expression = self._parse_number() 2117 self._match_r_paren() 2118 else: 2119 self.raise_error("Failed to parse partition bound spec.") 2120 2121 return self.expression( 2122 exp.PartitionBoundSpec, 2123 this=this, 2124 expression=expression, 2125 from_expressions=from_expressions, 2126 to_expressions=to_expressions, 2127 ) 2128 2129 # https://www.postgresql.org/docs/current/sql-createtable.html 2130 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2131 if not self._match_text_seq("OF"): 2132 self._retreat(self._index - 1) 2133 return None 2134 2135 this = self._parse_table(schema=True) 2136 2137 if self._match(TokenType.DEFAULT): 2138 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2139 elif self._match_text_seq("FOR", "VALUES"): 2140 expression = self._parse_partition_bound_spec() 2141 else: 2142 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2143 2144 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2145 2146 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2147 self._match(TokenType.EQ) 2148 return self.expression( 2149 exp.PartitionedByProperty, 2150 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2151 ) 2152 2153 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2154 if self._match_text_seq("AND", "STATISTICS"): 2155 statistics = True 2156 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2157 statistics = False 2158 else: 2159 statistics = None 2160 2161 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2162 2163 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2164 if self._match_text_seq("SQL"): 2165 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2166 return None 2167 2168 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2169 if self._match_text_seq("SQL", "DATA"): 2170 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2171 return None 2172 2173 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2174 if self._match_text_seq("PRIMARY", "INDEX"): 2175 return exp.NoPrimaryIndexProperty() 2176 if self._match_text_seq("SQL"): 2177 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2178 return None 2179 2180 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2181 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2182 return exp.OnCommitProperty() 2183 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2184 return exp.OnCommitProperty(delete=True) 2185 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2186 2187 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2188 if self._match_text_seq("SQL", "DATA"): 2189 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2190 return None 2191 2192 def _parse_distkey(self) -> exp.DistKeyProperty: 2193 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2194 2195 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2196 table = self._parse_table(schema=True) 2197 2198 options = [] 2199 while self._match_texts(("INCLUDING", "EXCLUDING")): 2200 this = self._prev.text.upper() 2201 2202 id_var = self._parse_id_var() 2203 if not id_var: 2204 return None 2205 2206 options.append( 2207 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2208 ) 2209 2210 return self.expression(exp.LikeProperty, this=table, expressions=options) 2211 2212 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2213 return self.expression( 2214 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2215 ) 2216 2217 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2218 self._match(TokenType.EQ) 2219 return self.expression( 2220 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2221 ) 2222 2223 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2224 self._match_text_seq("WITH", "CONNECTION") 2225 return self.expression( 2226 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2227 ) 2228 2229 def _parse_returns(self) -> exp.ReturnsProperty: 2230 value: t.Optional[exp.Expression] 2231 is_table = self._match(TokenType.TABLE) 2232 2233 if is_table: 2234 if self._match(TokenType.LT): 2235 value = self.expression( 2236 exp.Schema, 2237 this="TABLE", 2238 expressions=self._parse_csv(self._parse_struct_types), 2239 ) 2240 if not self._match(TokenType.GT): 2241 self.raise_error("Expecting >") 2242 else: 2243 value = self._parse_schema(exp.var("TABLE")) 2244 else: 2245 value = self._parse_types() 2246 2247 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2248 2249 def _parse_describe(self) -> exp.Describe: 2250 kind = self._match_set(self.CREATABLES) and self._prev.text 2251 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2252 if self._match(TokenType.DOT): 2253 style = None 2254 self._retreat(self._index - 2) 2255 this = self._parse_table(schema=True) 2256 properties = self._parse_properties() 2257 expressions = properties.expressions if properties else None 2258 return self.expression( 2259 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2260 ) 2261 2262 def _parse_insert(self) -> exp.Insert: 2263 comments = ensure_list(self._prev_comments) 2264 hint = self._parse_hint() 2265 overwrite = self._match(TokenType.OVERWRITE) 2266 ignore = self._match(TokenType.IGNORE) 2267 local = self._match_text_seq("LOCAL") 2268 alternative = None 2269 is_function = None 2270 2271 if self._match_text_seq("DIRECTORY"): 2272 this: t.Optional[exp.Expression] = self.expression( 2273 exp.Directory, 2274 this=self._parse_var_or_string(), 2275 local=local, 2276 row_format=self._parse_row_format(match_row=True), 2277 ) 2278 else: 2279 if self._match(TokenType.OR): 2280 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2281 2282 self._match(TokenType.INTO) 2283 comments += ensure_list(self._prev_comments) 2284 self._match(TokenType.TABLE) 2285 is_function = self._match(TokenType.FUNCTION) 2286 2287 this = ( 2288 self._parse_table(schema=True, parse_partition=True) 2289 if not is_function 2290 else self._parse_function() 2291 ) 2292 2293 returning = self._parse_returning() 2294 2295 return self.expression( 2296 exp.Insert, 2297 comments=comments, 2298 hint=hint, 2299 is_function=is_function, 2300 this=this, 2301 stored=self._match_text_seq("STORED") and self._parse_stored(), 2302 by_name=self._match_text_seq("BY", "NAME"), 2303 exists=self._parse_exists(), 2304 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2305 and self._parse_conjunction(), 2306 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2307 conflict=self._parse_on_conflict(), 2308 returning=returning or self._parse_returning(), 2309 overwrite=overwrite, 2310 alternative=alternative, 2311 ignore=ignore, 2312 ) 2313 2314 def _parse_kill(self) -> exp.Kill: 2315 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2316 2317 return self.expression( 2318 exp.Kill, 2319 this=self._parse_primary(), 2320 kind=kind, 2321 ) 2322 2323 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2324 conflict = self._match_text_seq("ON", "CONFLICT") 2325 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2326 2327 if not conflict and not duplicate: 2328 return None 2329 2330 conflict_keys = None 2331 constraint = None 2332 2333 if conflict: 2334 if self._match_text_seq("ON", "CONSTRAINT"): 2335 constraint = self._parse_id_var() 2336 elif self._match(TokenType.L_PAREN): 2337 conflict_keys = self._parse_csv(self._parse_id_var) 2338 self._match_r_paren() 2339 2340 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2341 if self._prev.token_type == TokenType.UPDATE: 2342 self._match(TokenType.SET) 2343 expressions = self._parse_csv(self._parse_equality) 2344 else: 2345 expressions = None 2346 2347 return self.expression( 2348 exp.OnConflict, 2349 duplicate=duplicate, 2350 expressions=expressions, 2351 action=action, 2352 conflict_keys=conflict_keys, 2353 constraint=constraint, 2354 ) 2355 2356 def _parse_returning(self) -> t.Optional[exp.Returning]: 2357 if not self._match(TokenType.RETURNING): 2358 return None 2359 return self.expression( 2360 exp.Returning, 2361 expressions=self._parse_csv(self._parse_expression), 2362 into=self._match(TokenType.INTO) and self._parse_table_part(), 2363 ) 2364 2365 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2366 if not self._match(TokenType.FORMAT): 2367 return None 2368 return self._parse_row_format() 2369 2370 def _parse_row_format( 2371 self, match_row: bool = False 2372 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2373 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2374 return None 2375 2376 if self._match_text_seq("SERDE"): 2377 this = self._parse_string() 2378 2379 serde_properties = None 2380 if self._match(TokenType.SERDE_PROPERTIES): 2381 serde_properties = self.expression( 2382 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2383 ) 2384 2385 return self.expression( 2386 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2387 ) 2388 2389 self._match_text_seq("DELIMITED") 2390 2391 kwargs = {} 2392 2393 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2394 kwargs["fields"] = self._parse_string() 2395 if self._match_text_seq("ESCAPED", "BY"): 2396 kwargs["escaped"] = self._parse_string() 2397 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2398 kwargs["collection_items"] = self._parse_string() 2399 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2400 kwargs["map_keys"] = self._parse_string() 2401 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2402 kwargs["lines"] = self._parse_string() 2403 if self._match_text_seq("NULL", "DEFINED", "AS"): 2404 kwargs["null"] = self._parse_string() 2405 2406 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2407 2408 def _parse_load(self) -> exp.LoadData | exp.Command: 2409 if self._match_text_seq("DATA"): 2410 local = self._match_text_seq("LOCAL") 2411 self._match_text_seq("INPATH") 2412 inpath = self._parse_string() 2413 overwrite = self._match(TokenType.OVERWRITE) 2414 self._match_pair(TokenType.INTO, TokenType.TABLE) 2415 2416 return self.expression( 2417 exp.LoadData, 2418 this=self._parse_table(schema=True), 2419 local=local, 2420 overwrite=overwrite, 2421 inpath=inpath, 2422 partition=self._parse_partition(), 2423 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2424 serde=self._match_text_seq("SERDE") and self._parse_string(), 2425 ) 2426 return self._parse_as_command(self._prev) 2427 2428 def _parse_delete(self) -> exp.Delete: 2429 # This handles MySQL's "Multiple-Table Syntax" 2430 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2431 tables = None 2432 comments = self._prev_comments 2433 if not self._match(TokenType.FROM, advance=False): 2434 tables = self._parse_csv(self._parse_table) or None 2435 2436 returning = self._parse_returning() 2437 2438 return self.expression( 2439 exp.Delete, 2440 comments=comments, 2441 tables=tables, 2442 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2443 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2444 where=self._parse_where(), 2445 returning=returning or self._parse_returning(), 2446 limit=self._parse_limit(), 2447 ) 2448 2449 def _parse_update(self) -> exp.Update: 2450 comments = self._prev_comments 2451 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2452 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2453 returning = self._parse_returning() 2454 return self.expression( 2455 exp.Update, 2456 comments=comments, 2457 **{ # type: ignore 2458 "this": this, 2459 "expressions": expressions, 2460 "from": self._parse_from(joins=True), 2461 "where": self._parse_where(), 2462 "returning": returning or self._parse_returning(), 2463 "order": self._parse_order(), 2464 "limit": self._parse_limit(), 2465 }, 2466 ) 2467 2468 def _parse_uncache(self) -> exp.Uncache: 2469 if not self._match(TokenType.TABLE): 2470 self.raise_error("Expecting TABLE after UNCACHE") 2471 2472 return self.expression( 2473 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2474 ) 2475 2476 def _parse_cache(self) -> exp.Cache: 2477 lazy = self._match_text_seq("LAZY") 2478 self._match(TokenType.TABLE) 2479 table = self._parse_table(schema=True) 2480 2481 options = [] 2482 if self._match_text_seq("OPTIONS"): 2483 self._match_l_paren() 2484 k = self._parse_string() 2485 self._match(TokenType.EQ) 2486 v = self._parse_string() 2487 options = [k, v] 2488 self._match_r_paren() 2489 2490 self._match(TokenType.ALIAS) 2491 return self.expression( 2492 exp.Cache, 2493 this=table, 2494 lazy=lazy, 2495 options=options, 2496 expression=self._parse_select(nested=True), 2497 ) 2498 2499 def _parse_partition(self) -> t.Optional[exp.Partition]: 2500 if not self._match(TokenType.PARTITION): 2501 return None 2502 2503 return self.expression( 2504 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2505 ) 2506 2507 def _parse_value(self) -> t.Optional[exp.Tuple]: 2508 if self._match(TokenType.L_PAREN): 2509 expressions = self._parse_csv(self._parse_expression) 2510 self._match_r_paren() 2511 return self.expression(exp.Tuple, expressions=expressions) 2512 2513 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2514 expression = self._parse_expression() 2515 if expression: 2516 return self.expression(exp.Tuple, expressions=[expression]) 2517 return None 2518 2519 def _parse_projections(self) -> t.List[exp.Expression]: 2520 return self._parse_expressions() 2521 2522 def _parse_select( 2523 self, 2524 nested: bool = False, 2525 table: bool = False, 2526 parse_subquery_alias: bool = True, 2527 parse_set_operation: bool = True, 2528 ) -> t.Optional[exp.Expression]: 2529 cte = self._parse_with() 2530 2531 if cte: 2532 this = self._parse_statement() 2533 2534 if not this: 2535 self.raise_error("Failed to parse any statement following CTE") 2536 return cte 2537 2538 if "with" in this.arg_types: 2539 this.set("with", cte) 2540 else: 2541 self.raise_error(f"{this.key} does not support CTE") 2542 this = cte 2543 2544 return this 2545 2546 # duckdb supports leading with FROM x 2547 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2548 2549 if self._match(TokenType.SELECT): 2550 comments = self._prev_comments 2551 2552 hint = self._parse_hint() 2553 all_ = self._match(TokenType.ALL) 2554 distinct = self._match_set(self.DISTINCT_TOKENS) 2555 2556 kind = ( 2557 self._match(TokenType.ALIAS) 2558 and self._match_texts(("STRUCT", "VALUE")) 2559 and self._prev.text.upper() 2560 ) 2561 2562 if distinct: 2563 distinct = self.expression( 2564 exp.Distinct, 2565 on=self._parse_value() if self._match(TokenType.ON) else None, 2566 ) 2567 2568 if all_ and distinct: 2569 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2570 2571 limit = self._parse_limit(top=True) 2572 projections = self._parse_projections() 2573 2574 this = self.expression( 2575 exp.Select, 2576 kind=kind, 2577 hint=hint, 2578 distinct=distinct, 2579 expressions=projections, 2580 limit=limit, 2581 ) 2582 this.comments = comments 2583 2584 into = self._parse_into() 2585 if into: 2586 this.set("into", into) 2587 2588 if not from_: 2589 from_ = self._parse_from() 2590 2591 if from_: 2592 this.set("from", from_) 2593 2594 this = self._parse_query_modifiers(this) 2595 elif (table or nested) and self._match(TokenType.L_PAREN): 2596 if self._match(TokenType.PIVOT): 2597 this = self._parse_simplified_pivot() 2598 elif self._match(TokenType.FROM): 2599 this = exp.select("*").from_( 2600 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2601 ) 2602 else: 2603 this = ( 2604 self._parse_table() 2605 if table 2606 else self._parse_select(nested=True, parse_set_operation=False) 2607 ) 2608 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2609 2610 self._match_r_paren() 2611 2612 # We return early here so that the UNION isn't attached to the subquery by the 2613 # following call to _parse_set_operations, but instead becomes the parent node 2614 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2615 elif self._match(TokenType.VALUES, advance=False): 2616 this = self._parse_derived_table_values() 2617 elif from_: 2618 this = exp.select("*").from_(from_.this, copy=False) 2619 else: 2620 this = None 2621 2622 if parse_set_operation: 2623 return self._parse_set_operations(this) 2624 return this 2625 2626 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2627 if not skip_with_token and not self._match(TokenType.WITH): 2628 return None 2629 2630 comments = self._prev_comments 2631 recursive = self._match(TokenType.RECURSIVE) 2632 2633 expressions = [] 2634 while True: 2635 expressions.append(self._parse_cte()) 2636 2637 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2638 break 2639 else: 2640 self._match(TokenType.WITH) 2641 2642 return self.expression( 2643 exp.With, comments=comments, expressions=expressions, recursive=recursive 2644 ) 2645 2646 def _parse_cte(self) -> exp.CTE: 2647 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2648 if not alias or not alias.this: 2649 self.raise_error("Expected CTE to have alias") 2650 2651 self._match(TokenType.ALIAS) 2652 2653 if self._match_text_seq("NOT", "MATERIALIZED"): 2654 materialized = False 2655 elif self._match_text_seq("MATERIALIZED"): 2656 materialized = True 2657 else: 2658 materialized = None 2659 2660 return self.expression( 2661 exp.CTE, 2662 this=self._parse_wrapped(self._parse_statement), 2663 alias=alias, 2664 materialized=materialized, 2665 ) 2666 2667 def _parse_table_alias( 2668 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2669 ) -> t.Optional[exp.TableAlias]: 2670 any_token = self._match(TokenType.ALIAS) 2671 alias = ( 2672 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2673 or self._parse_string_as_identifier() 2674 ) 2675 2676 index = self._index 2677 if self._match(TokenType.L_PAREN): 2678 columns = self._parse_csv(self._parse_function_parameter) 2679 self._match_r_paren() if columns else self._retreat(index) 2680 else: 2681 columns = None 2682 2683 if not alias and not columns: 2684 return None 2685 2686 return self.expression(exp.TableAlias, this=alias, columns=columns) 2687 2688 def _parse_subquery( 2689 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2690 ) -> t.Optional[exp.Subquery]: 2691 if not this: 2692 return None 2693 2694 return self.expression( 2695 exp.Subquery, 2696 this=this, 2697 pivots=self._parse_pivots(), 2698 alias=self._parse_table_alias() if parse_alias else None, 2699 ) 2700 2701 def _implicit_unnests_to_explicit(self, this: E) -> E: 2702 from sqlglot.optimizer.normalize_identifiers import ( 2703 normalize_identifiers as _norm, 2704 ) 2705 2706 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2707 for i, join in enumerate(this.args.get("joins") or []): 2708 table = join.this 2709 normalized_table = table.copy() 2710 normalized_table.meta["maybe_column"] = True 2711 normalized_table = _norm(normalized_table, dialect=self.dialect) 2712 2713 if isinstance(table, exp.Table) and not join.args.get("on"): 2714 if normalized_table.parts[0].name in refs: 2715 table_as_column = table.to_column() 2716 unnest = exp.Unnest(expressions=[table_as_column]) 2717 2718 # Table.to_column creates a parent Alias node that we want to convert to 2719 # a TableAlias and attach to the Unnest, so it matches the parser's output 2720 if isinstance(table.args.get("alias"), exp.TableAlias): 2721 table_as_column.replace(table_as_column.this) 2722 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2723 2724 table.replace(unnest) 2725 2726 refs.add(normalized_table.alias_or_name) 2727 2728 return this 2729 2730 def _parse_query_modifiers( 2731 self, this: t.Optional[exp.Expression] 2732 ) -> t.Optional[exp.Expression]: 2733 if isinstance(this, (exp.Query, exp.Table)): 2734 for join in self._parse_joins(): 2735 this.append("joins", join) 2736 for lateral in iter(self._parse_lateral, None): 2737 this.append("laterals", lateral) 2738 2739 while True: 2740 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2741 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2742 key, expression = parser(self) 2743 2744 if expression: 2745 this.set(key, expression) 2746 if key == "limit": 2747 offset = expression.args.pop("offset", None) 2748 2749 if offset: 2750 offset = exp.Offset(expression=offset) 2751 this.set("offset", offset) 2752 2753 limit_by_expressions = expression.expressions 2754 expression.set("expressions", None) 2755 offset.set("expressions", limit_by_expressions) 2756 continue 2757 break 2758 2759 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2760 this = self._implicit_unnests_to_explicit(this) 2761 2762 return this 2763 2764 def _parse_hint(self) -> t.Optional[exp.Hint]: 2765 if self._match(TokenType.HINT): 2766 hints = [] 2767 for hint in iter( 2768 lambda: self._parse_csv( 2769 lambda: self._parse_function() or self._parse_var(upper=True) 2770 ), 2771 [], 2772 ): 2773 hints.extend(hint) 2774 2775 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2776 self.raise_error("Expected */ after HINT") 2777 2778 return self.expression(exp.Hint, expressions=hints) 2779 2780 return None 2781 2782 def _parse_into(self) -> t.Optional[exp.Into]: 2783 if not self._match(TokenType.INTO): 2784 return None 2785 2786 temp = self._match(TokenType.TEMPORARY) 2787 unlogged = self._match_text_seq("UNLOGGED") 2788 self._match(TokenType.TABLE) 2789 2790 return self.expression( 2791 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2792 ) 2793 2794 def _parse_from( 2795 self, joins: bool = False, skip_from_token: bool = False 2796 ) -> t.Optional[exp.From]: 2797 if not skip_from_token and not self._match(TokenType.FROM): 2798 return None 2799 2800 return self.expression( 2801 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2802 ) 2803 2804 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2805 return self.expression( 2806 exp.MatchRecognizeMeasure, 2807 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2808 this=self._parse_expression(), 2809 ) 2810 2811 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2812 if not self._match(TokenType.MATCH_RECOGNIZE): 2813 return None 2814 2815 self._match_l_paren() 2816 2817 partition = self._parse_partition_by() 2818 order = self._parse_order() 2819 2820 measures = ( 2821 self._parse_csv(self._parse_match_recognize_measure) 2822 if self._match_text_seq("MEASURES") 2823 else None 2824 ) 2825 2826 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2827 rows = exp.var("ONE ROW PER MATCH") 2828 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2829 text = "ALL ROWS PER MATCH" 2830 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2831 text += " SHOW EMPTY MATCHES" 2832 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2833 text += " OMIT EMPTY MATCHES" 2834 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2835 text += " WITH UNMATCHED ROWS" 2836 rows = exp.var(text) 2837 else: 2838 rows = None 2839 2840 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2841 text = "AFTER MATCH SKIP" 2842 if self._match_text_seq("PAST", "LAST", "ROW"): 2843 text += " PAST LAST ROW" 2844 elif self._match_text_seq("TO", "NEXT", "ROW"): 2845 text += " TO NEXT ROW" 2846 elif self._match_text_seq("TO", "FIRST"): 2847 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2848 elif self._match_text_seq("TO", "LAST"): 2849 text += f" TO LAST {self._advance_any().text}" # type: ignore 2850 after = exp.var(text) 2851 else: 2852 after = None 2853 2854 if self._match_text_seq("PATTERN"): 2855 self._match_l_paren() 2856 2857 if not self._curr: 2858 self.raise_error("Expecting )", self._curr) 2859 2860 paren = 1 2861 start = self._curr 2862 2863 while self._curr and paren > 0: 2864 if self._curr.token_type == TokenType.L_PAREN: 2865 paren += 1 2866 if self._curr.token_type == TokenType.R_PAREN: 2867 paren -= 1 2868 2869 end = self._prev 2870 self._advance() 2871 2872 if paren > 0: 2873 self.raise_error("Expecting )", self._curr) 2874 2875 pattern = exp.var(self._find_sql(start, end)) 2876 else: 2877 pattern = None 2878 2879 define = ( 2880 self._parse_csv(self._parse_name_as_expression) 2881 if self._match_text_seq("DEFINE") 2882 else None 2883 ) 2884 2885 self._match_r_paren() 2886 2887 return self.expression( 2888 exp.MatchRecognize, 2889 partition_by=partition, 2890 order=order, 2891 measures=measures, 2892 rows=rows, 2893 after=after, 2894 pattern=pattern, 2895 define=define, 2896 alias=self._parse_table_alias(), 2897 ) 2898 2899 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2900 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2901 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2902 cross_apply = False 2903 2904 if cross_apply is not None: 2905 this = self._parse_select(table=True) 2906 view = None 2907 outer = None 2908 elif self._match(TokenType.LATERAL): 2909 this = self._parse_select(table=True) 2910 view = self._match(TokenType.VIEW) 2911 outer = self._match(TokenType.OUTER) 2912 else: 2913 return None 2914 2915 if not this: 2916 this = ( 2917 self._parse_unnest() 2918 or self._parse_function() 2919 or self._parse_id_var(any_token=False) 2920 ) 2921 2922 while self._match(TokenType.DOT): 2923 this = exp.Dot( 2924 this=this, 2925 expression=self._parse_function() or self._parse_id_var(any_token=False), 2926 ) 2927 2928 if view: 2929 table = self._parse_id_var(any_token=False) 2930 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2931 table_alias: t.Optional[exp.TableAlias] = self.expression( 2932 exp.TableAlias, this=table, columns=columns 2933 ) 2934 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2935 # We move the alias from the lateral's child node to the lateral itself 2936 table_alias = this.args["alias"].pop() 2937 else: 2938 table_alias = self._parse_table_alias() 2939 2940 return self.expression( 2941 exp.Lateral, 2942 this=this, 2943 view=view, 2944 outer=outer, 2945 alias=table_alias, 2946 cross_apply=cross_apply, 2947 ) 2948 2949 def _parse_join_parts( 2950 self, 2951 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2952 return ( 2953 self._match_set(self.JOIN_METHODS) and self._prev, 2954 self._match_set(self.JOIN_SIDES) and self._prev, 2955 self._match_set(self.JOIN_KINDS) and self._prev, 2956 ) 2957 2958 def _parse_join( 2959 self, skip_join_token: bool = False, parse_bracket: bool = False 2960 ) -> t.Optional[exp.Join]: 2961 if self._match(TokenType.COMMA): 2962 return self.expression(exp.Join, this=self._parse_table()) 2963 2964 index = self._index 2965 method, side, kind = self._parse_join_parts() 2966 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2967 join = self._match(TokenType.JOIN) 2968 2969 if not skip_join_token and not join: 2970 self._retreat(index) 2971 kind = None 2972 method = None 2973 side = None 2974 2975 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2976 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2977 2978 if not skip_join_token and not join and not outer_apply and not cross_apply: 2979 return None 2980 2981 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2982 2983 if method: 2984 kwargs["method"] = method.text 2985 if side: 2986 kwargs["side"] = side.text 2987 if kind: 2988 kwargs["kind"] = kind.text 2989 if hint: 2990 kwargs["hint"] = hint 2991 2992 if self._match(TokenType.MATCH_CONDITION): 2993 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 2994 2995 if self._match(TokenType.ON): 2996 kwargs["on"] = self._parse_conjunction() 2997 elif self._match(TokenType.USING): 2998 kwargs["using"] = self._parse_wrapped_id_vars() 2999 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3000 kind and kind.token_type == TokenType.CROSS 3001 ): 3002 index = self._index 3003 joins: t.Optional[list] = list(self._parse_joins()) 3004 3005 if joins and self._match(TokenType.ON): 3006 kwargs["on"] = self._parse_conjunction() 3007 elif joins and self._match(TokenType.USING): 3008 kwargs["using"] = self._parse_wrapped_id_vars() 3009 else: 3010 joins = None 3011 self._retreat(index) 3012 3013 kwargs["this"].set("joins", joins if joins else None) 3014 3015 comments = [c for token in (method, side, kind) if token for c in token.comments] 3016 return self.expression(exp.Join, comments=comments, **kwargs) 3017 3018 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3019 this = self._parse_conjunction() 3020 3021 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3022 return this 3023 3024 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3025 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3026 3027 return this 3028 3029 def _parse_index_params(self) -> exp.IndexParameters: 3030 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3031 3032 if self._match(TokenType.L_PAREN, advance=False): 3033 columns = self._parse_wrapped_csv(self._parse_with_operator) 3034 else: 3035 columns = None 3036 3037 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3038 partition_by = self._parse_partition_by() 3039 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3040 tablespace = ( 3041 self._parse_var(any_token=True) 3042 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3043 else None 3044 ) 3045 where = self._parse_where() 3046 3047 return self.expression( 3048 exp.IndexParameters, 3049 using=using, 3050 columns=columns, 3051 include=include, 3052 partition_by=partition_by, 3053 where=where, 3054 with_storage=with_storage, 3055 tablespace=tablespace, 3056 ) 3057 3058 def _parse_index( 3059 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3060 ) -> t.Optional[exp.Index]: 3061 if index or anonymous: 3062 unique = None 3063 primary = None 3064 amp = None 3065 3066 self._match(TokenType.ON) 3067 self._match(TokenType.TABLE) # hive 3068 table = self._parse_table_parts(schema=True) 3069 else: 3070 unique = self._match(TokenType.UNIQUE) 3071 primary = self._match_text_seq("PRIMARY") 3072 amp = self._match_text_seq("AMP") 3073 3074 if not self._match(TokenType.INDEX): 3075 return None 3076 3077 index = self._parse_id_var() 3078 table = None 3079 3080 params = self._parse_index_params() 3081 3082 return self.expression( 3083 exp.Index, 3084 this=index, 3085 table=table, 3086 unique=unique, 3087 primary=primary, 3088 amp=amp, 3089 params=params, 3090 ) 3091 3092 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3093 hints: t.List[exp.Expression] = [] 3094 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3095 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3096 hints.append( 3097 self.expression( 3098 exp.WithTableHint, 3099 expressions=self._parse_csv( 3100 lambda: self._parse_function() or self._parse_var(any_token=True) 3101 ), 3102 ) 3103 ) 3104 self._match_r_paren() 3105 else: 3106 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3107 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3108 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3109 3110 self._match_texts(("INDEX", "KEY")) 3111 if self._match(TokenType.FOR): 3112 hint.set("target", self._advance_any() and self._prev.text.upper()) 3113 3114 hint.set("expressions", self._parse_wrapped_id_vars()) 3115 hints.append(hint) 3116 3117 return hints or None 3118 3119 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3120 return ( 3121 (not schema and self._parse_function(optional_parens=False)) 3122 or self._parse_id_var(any_token=False) 3123 or self._parse_string_as_identifier() 3124 or self._parse_placeholder() 3125 ) 3126 3127 def _parse_table_parts( 3128 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3129 ) -> exp.Table: 3130 catalog = None 3131 db = None 3132 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3133 3134 while self._match(TokenType.DOT): 3135 if catalog: 3136 # This allows nesting the table in arbitrarily many dot expressions if needed 3137 table = self.expression( 3138 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3139 ) 3140 else: 3141 catalog = db 3142 db = table 3143 # "" used for tsql FROM a..b case 3144 table = self._parse_table_part(schema=schema) or "" 3145 3146 if ( 3147 wildcard 3148 and self._is_connected() 3149 and (isinstance(table, exp.Identifier) or not table) 3150 and self._match(TokenType.STAR) 3151 ): 3152 if isinstance(table, exp.Identifier): 3153 table.args["this"] += "*" 3154 else: 3155 table = exp.Identifier(this="*") 3156 3157 # We bubble up comments from the Identifier to the Table 3158 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3159 3160 if is_db_reference: 3161 catalog = db 3162 db = table 3163 table = None 3164 3165 if not table and not is_db_reference: 3166 self.raise_error(f"Expected table name but got {self._curr}") 3167 if not db and is_db_reference: 3168 self.raise_error(f"Expected database name but got {self._curr}") 3169 3170 return self.expression( 3171 exp.Table, 3172 comments=comments, 3173 this=table, 3174 db=db, 3175 catalog=catalog, 3176 pivots=self._parse_pivots(), 3177 ) 3178 3179 def _parse_table( 3180 self, 3181 schema: bool = False, 3182 joins: bool = False, 3183 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3184 parse_bracket: bool = False, 3185 is_db_reference: bool = False, 3186 parse_partition: bool = False, 3187 ) -> t.Optional[exp.Expression]: 3188 lateral = self._parse_lateral() 3189 if lateral: 3190 return lateral 3191 3192 unnest = self._parse_unnest() 3193 if unnest: 3194 return unnest 3195 3196 values = self._parse_derived_table_values() 3197 if values: 3198 return values 3199 3200 subquery = self._parse_select(table=True) 3201 if subquery: 3202 if not subquery.args.get("pivots"): 3203 subquery.set("pivots", self._parse_pivots()) 3204 return subquery 3205 3206 bracket = parse_bracket and self._parse_bracket(None) 3207 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3208 3209 only = self._match(TokenType.ONLY) 3210 3211 this = t.cast( 3212 exp.Expression, 3213 bracket 3214 or self._parse_bracket( 3215 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3216 ), 3217 ) 3218 3219 if only: 3220 this.set("only", only) 3221 3222 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3223 self._match_text_seq("*") 3224 3225 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3226 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3227 this.set("partition", self._parse_partition()) 3228 3229 if schema: 3230 return self._parse_schema(this=this) 3231 3232 version = self._parse_version() 3233 3234 if version: 3235 this.set("version", version) 3236 3237 if self.dialect.ALIAS_POST_TABLESAMPLE: 3238 table_sample = self._parse_table_sample() 3239 3240 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3241 if alias: 3242 this.set("alias", alias) 3243 3244 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3245 return self.expression( 3246 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3247 ) 3248 3249 this.set("hints", self._parse_table_hints()) 3250 3251 if not this.args.get("pivots"): 3252 this.set("pivots", self._parse_pivots()) 3253 3254 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3255 table_sample = self._parse_table_sample() 3256 3257 if table_sample: 3258 table_sample.set("this", this) 3259 this = table_sample 3260 3261 if joins: 3262 for join in self._parse_joins(): 3263 this.append("joins", join) 3264 3265 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3266 this.set("ordinality", True) 3267 this.set("alias", self._parse_table_alias()) 3268 3269 return this 3270 3271 def _parse_version(self) -> t.Optional[exp.Version]: 3272 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3273 this = "TIMESTAMP" 3274 elif self._match(TokenType.VERSION_SNAPSHOT): 3275 this = "VERSION" 3276 else: 3277 return None 3278 3279 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3280 kind = self._prev.text.upper() 3281 start = self._parse_bitwise() 3282 self._match_texts(("TO", "AND")) 3283 end = self._parse_bitwise() 3284 expression: t.Optional[exp.Expression] = self.expression( 3285 exp.Tuple, expressions=[start, end] 3286 ) 3287 elif self._match_text_seq("CONTAINED", "IN"): 3288 kind = "CONTAINED IN" 3289 expression = self.expression( 3290 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3291 ) 3292 elif self._match(TokenType.ALL): 3293 kind = "ALL" 3294 expression = None 3295 else: 3296 self._match_text_seq("AS", "OF") 3297 kind = "AS OF" 3298 expression = self._parse_type() 3299 3300 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3301 3302 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3303 if not self._match(TokenType.UNNEST): 3304 return None 3305 3306 expressions = self._parse_wrapped_csv(self._parse_equality) 3307 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3308 3309 alias = self._parse_table_alias() if with_alias else None 3310 3311 if alias: 3312 if self.dialect.UNNEST_COLUMN_ONLY: 3313 if alias.args.get("columns"): 3314 self.raise_error("Unexpected extra column alias in unnest.") 3315 3316 alias.set("columns", [alias.this]) 3317 alias.set("this", None) 3318 3319 columns = alias.args.get("columns") or [] 3320 if offset and len(expressions) < len(columns): 3321 offset = columns.pop() 3322 3323 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3324 self._match(TokenType.ALIAS) 3325 offset = self._parse_id_var( 3326 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3327 ) or exp.to_identifier("offset") 3328 3329 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3330 3331 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3332 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3333 if not is_derived and not self._match_text_seq("VALUES"): 3334 return None 3335 3336 expressions = self._parse_csv(self._parse_value) 3337 alias = self._parse_table_alias() 3338 3339 if is_derived: 3340 self._match_r_paren() 3341 3342 return self.expression( 3343 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3344 ) 3345 3346 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3347 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3348 as_modifier and self._match_text_seq("USING", "SAMPLE") 3349 ): 3350 return None 3351 3352 bucket_numerator = None 3353 bucket_denominator = None 3354 bucket_field = None 3355 percent = None 3356 size = None 3357 seed = None 3358 3359 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3360 matched_l_paren = self._match(TokenType.L_PAREN) 3361 3362 if self.TABLESAMPLE_CSV: 3363 num = None 3364 expressions = self._parse_csv(self._parse_primary) 3365 else: 3366 expressions = None 3367 num = ( 3368 self._parse_factor() 3369 if self._match(TokenType.NUMBER, advance=False) 3370 else self._parse_primary() or self._parse_placeholder() 3371 ) 3372 3373 if self._match_text_seq("BUCKET"): 3374 bucket_numerator = self._parse_number() 3375 self._match_text_seq("OUT", "OF") 3376 bucket_denominator = bucket_denominator = self._parse_number() 3377 self._match(TokenType.ON) 3378 bucket_field = self._parse_field() 3379 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3380 percent = num 3381 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3382 size = num 3383 else: 3384 percent = num 3385 3386 if matched_l_paren: 3387 self._match_r_paren() 3388 3389 if self._match(TokenType.L_PAREN): 3390 method = self._parse_var(upper=True) 3391 seed = self._match(TokenType.COMMA) and self._parse_number() 3392 self._match_r_paren() 3393 elif self._match_texts(("SEED", "REPEATABLE")): 3394 seed = self._parse_wrapped(self._parse_number) 3395 3396 return self.expression( 3397 exp.TableSample, 3398 expressions=expressions, 3399 method=method, 3400 bucket_numerator=bucket_numerator, 3401 bucket_denominator=bucket_denominator, 3402 bucket_field=bucket_field, 3403 percent=percent, 3404 size=size, 3405 seed=seed, 3406 ) 3407 3408 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3409 return list(iter(self._parse_pivot, None)) or None 3410 3411 def _parse_joins(self) -> t.Iterator[exp.Join]: 3412 return iter(self._parse_join, None) 3413 3414 # https://duckdb.org/docs/sql/statements/pivot 3415 def _parse_simplified_pivot(self) -> exp.Pivot: 3416 def _parse_on() -> t.Optional[exp.Expression]: 3417 this = self._parse_bitwise() 3418 return self._parse_in(this) if self._match(TokenType.IN) else this 3419 3420 this = self._parse_table() 3421 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3422 using = self._match(TokenType.USING) and self._parse_csv( 3423 lambda: self._parse_alias(self._parse_function()) 3424 ) 3425 group = self._parse_group() 3426 return self.expression( 3427 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3428 ) 3429 3430 def _parse_pivot_in(self) -> exp.In: 3431 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3432 this = self._parse_conjunction() 3433 3434 self._match(TokenType.ALIAS) 3435 alias = self._parse_field() 3436 if alias: 3437 return self.expression(exp.PivotAlias, this=this, alias=alias) 3438 3439 return this 3440 3441 value = self._parse_column() 3442 3443 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3444 self.raise_error("Expecting IN (") 3445 3446 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3447 3448 self._match_r_paren() 3449 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3450 3451 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3452 index = self._index 3453 include_nulls = None 3454 3455 if self._match(TokenType.PIVOT): 3456 unpivot = False 3457 elif self._match(TokenType.UNPIVOT): 3458 unpivot = True 3459 3460 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3461 if self._match_text_seq("INCLUDE", "NULLS"): 3462 include_nulls = True 3463 elif self._match_text_seq("EXCLUDE", "NULLS"): 3464 include_nulls = False 3465 else: 3466 return None 3467 3468 expressions = [] 3469 3470 if not self._match(TokenType.L_PAREN): 3471 self._retreat(index) 3472 return None 3473 3474 if unpivot: 3475 expressions = self._parse_csv(self._parse_column) 3476 else: 3477 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3478 3479 if not expressions: 3480 self.raise_error("Failed to parse PIVOT's aggregation list") 3481 3482 if not self._match(TokenType.FOR): 3483 self.raise_error("Expecting FOR") 3484 3485 field = self._parse_pivot_in() 3486 3487 self._match_r_paren() 3488 3489 pivot = self.expression( 3490 exp.Pivot, 3491 expressions=expressions, 3492 field=field, 3493 unpivot=unpivot, 3494 include_nulls=include_nulls, 3495 ) 3496 3497 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3498 pivot.set("alias", self._parse_table_alias()) 3499 3500 if not unpivot: 3501 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3502 3503 columns: t.List[exp.Expression] = [] 3504 for fld in pivot.args["field"].expressions: 3505 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3506 for name in names: 3507 if self.PREFIXED_PIVOT_COLUMNS: 3508 name = f"{name}_{field_name}" if name else field_name 3509 else: 3510 name = f"{field_name}_{name}" if name else field_name 3511 3512 columns.append(exp.to_identifier(name)) 3513 3514 pivot.set("columns", columns) 3515 3516 return pivot 3517 3518 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3519 return [agg.alias for agg in aggregations] 3520 3521 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3522 if not skip_where_token and not self._match(TokenType.PREWHERE): 3523 return None 3524 3525 return self.expression( 3526 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3527 ) 3528 3529 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3530 if not skip_where_token and not self._match(TokenType.WHERE): 3531 return None 3532 3533 return self.expression( 3534 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3535 ) 3536 3537 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3538 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3539 return None 3540 3541 elements: t.Dict[str, t.Any] = defaultdict(list) 3542 3543 if self._match(TokenType.ALL): 3544 elements["all"] = True 3545 elif self._match(TokenType.DISTINCT): 3546 elements["all"] = False 3547 3548 while True: 3549 expressions = self._parse_csv(self._parse_conjunction) 3550 if expressions: 3551 elements["expressions"].extend(expressions) 3552 3553 grouping_sets = self._parse_grouping_sets() 3554 if grouping_sets: 3555 elements["grouping_sets"].extend(grouping_sets) 3556 3557 rollup = None 3558 cube = None 3559 totals = None 3560 3561 index = self._index 3562 with_ = self._match(TokenType.WITH) 3563 if self._match(TokenType.ROLLUP): 3564 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3565 elements["rollup"].extend(ensure_list(rollup)) 3566 3567 if self._match(TokenType.CUBE): 3568 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3569 elements["cube"].extend(ensure_list(cube)) 3570 3571 if self._match_text_seq("TOTALS"): 3572 totals = True 3573 elements["totals"] = True # type: ignore 3574 3575 if not (grouping_sets or rollup or cube or totals): 3576 if with_: 3577 self._retreat(index) 3578 break 3579 3580 return self.expression(exp.Group, **elements) # type: ignore 3581 3582 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3583 if not self._match(TokenType.GROUPING_SETS): 3584 return None 3585 3586 return self._parse_wrapped_csv(self._parse_grouping_set) 3587 3588 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3589 if self._match(TokenType.L_PAREN): 3590 grouping_set = self._parse_csv(self._parse_column) 3591 self._match_r_paren() 3592 return self.expression(exp.Tuple, expressions=grouping_set) 3593 3594 return self._parse_column() 3595 3596 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3597 if not skip_having_token and not self._match(TokenType.HAVING): 3598 return None 3599 return self.expression(exp.Having, this=self._parse_conjunction()) 3600 3601 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3602 if not self._match(TokenType.QUALIFY): 3603 return None 3604 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3605 3606 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3607 if skip_start_token: 3608 start = None 3609 elif self._match(TokenType.START_WITH): 3610 start = self._parse_conjunction() 3611 else: 3612 return None 3613 3614 self._match(TokenType.CONNECT_BY) 3615 nocycle = self._match_text_seq("NOCYCLE") 3616 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3617 exp.Prior, this=self._parse_bitwise() 3618 ) 3619 connect = self._parse_conjunction() 3620 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3621 3622 if not start and self._match(TokenType.START_WITH): 3623 start = self._parse_conjunction() 3624 3625 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3626 3627 def _parse_name_as_expression(self) -> exp.Alias: 3628 return self.expression( 3629 exp.Alias, 3630 alias=self._parse_id_var(any_token=True), 3631 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3632 ) 3633 3634 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3635 if self._match_text_seq("INTERPOLATE"): 3636 return self._parse_wrapped_csv(self._parse_name_as_expression) 3637 return None 3638 3639 def _parse_order( 3640 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3641 ) -> t.Optional[exp.Expression]: 3642 siblings = None 3643 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3644 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3645 return this 3646 3647 siblings = True 3648 3649 return self.expression( 3650 exp.Order, 3651 this=this, 3652 expressions=self._parse_csv(self._parse_ordered), 3653 interpolate=self._parse_interpolate(), 3654 siblings=siblings, 3655 ) 3656 3657 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3658 if not self._match(token): 3659 return None 3660 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3661 3662 def _parse_ordered( 3663 self, parse_method: t.Optional[t.Callable] = None 3664 ) -> t.Optional[exp.Ordered]: 3665 this = parse_method() if parse_method else self._parse_conjunction() 3666 if not this: 3667 return None 3668 3669 asc = self._match(TokenType.ASC) 3670 desc = self._match(TokenType.DESC) or (asc and False) 3671 3672 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3673 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3674 3675 nulls_first = is_nulls_first or False 3676 explicitly_null_ordered = is_nulls_first or is_nulls_last 3677 3678 if ( 3679 not explicitly_null_ordered 3680 and ( 3681 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3682 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3683 ) 3684 and self.dialect.NULL_ORDERING != "nulls_are_last" 3685 ): 3686 nulls_first = True 3687 3688 if self._match_text_seq("WITH", "FILL"): 3689 with_fill = self.expression( 3690 exp.WithFill, 3691 **{ # type: ignore 3692 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3693 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3694 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3695 }, 3696 ) 3697 else: 3698 with_fill = None 3699 3700 return self.expression( 3701 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3702 ) 3703 3704 def _parse_limit( 3705 self, 3706 this: t.Optional[exp.Expression] = None, 3707 top: bool = False, 3708 skip_limit_token: bool = False, 3709 ) -> t.Optional[exp.Expression]: 3710 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3711 comments = self._prev_comments 3712 if top: 3713 limit_paren = self._match(TokenType.L_PAREN) 3714 expression = self._parse_term() if limit_paren else self._parse_number() 3715 3716 if limit_paren: 3717 self._match_r_paren() 3718 else: 3719 expression = self._parse_term() 3720 3721 if self._match(TokenType.COMMA): 3722 offset = expression 3723 expression = self._parse_term() 3724 else: 3725 offset = None 3726 3727 limit_exp = self.expression( 3728 exp.Limit, 3729 this=this, 3730 expression=expression, 3731 offset=offset, 3732 comments=comments, 3733 expressions=self._parse_limit_by(), 3734 ) 3735 3736 return limit_exp 3737 3738 if self._match(TokenType.FETCH): 3739 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3740 direction = self._prev.text.upper() if direction else "FIRST" 3741 3742 count = self._parse_field(tokens=self.FETCH_TOKENS) 3743 percent = self._match(TokenType.PERCENT) 3744 3745 self._match_set((TokenType.ROW, TokenType.ROWS)) 3746 3747 only = self._match_text_seq("ONLY") 3748 with_ties = self._match_text_seq("WITH", "TIES") 3749 3750 if only and with_ties: 3751 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3752 3753 return self.expression( 3754 exp.Fetch, 3755 direction=direction, 3756 count=count, 3757 percent=percent, 3758 with_ties=with_ties, 3759 ) 3760 3761 return this 3762 3763 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3764 if not self._match(TokenType.OFFSET): 3765 return this 3766 3767 count = self._parse_term() 3768 self._match_set((TokenType.ROW, TokenType.ROWS)) 3769 3770 return self.expression( 3771 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3772 ) 3773 3774 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3775 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3776 3777 def _parse_locks(self) -> t.List[exp.Lock]: 3778 locks = [] 3779 while True: 3780 if self._match_text_seq("FOR", "UPDATE"): 3781 update = True 3782 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3783 "LOCK", "IN", "SHARE", "MODE" 3784 ): 3785 update = False 3786 else: 3787 break 3788 3789 expressions = None 3790 if self._match_text_seq("OF"): 3791 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3792 3793 wait: t.Optional[bool | exp.Expression] = None 3794 if self._match_text_seq("NOWAIT"): 3795 wait = True 3796 elif self._match_text_seq("WAIT"): 3797 wait = self._parse_primary() 3798 elif self._match_text_seq("SKIP", "LOCKED"): 3799 wait = False 3800 3801 locks.append( 3802 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3803 ) 3804 3805 return locks 3806 3807 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3808 while this and self._match_set(self.SET_OPERATIONS): 3809 token_type = self._prev.token_type 3810 3811 if token_type == TokenType.UNION: 3812 operation = exp.Union 3813 elif token_type == TokenType.EXCEPT: 3814 operation = exp.Except 3815 else: 3816 operation = exp.Intersect 3817 3818 comments = self._prev.comments 3819 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3820 by_name = self._match_text_seq("BY", "NAME") 3821 expression = self._parse_select(nested=True, parse_set_operation=False) 3822 3823 this = self.expression( 3824 operation, 3825 comments=comments, 3826 this=this, 3827 distinct=distinct, 3828 by_name=by_name, 3829 expression=expression, 3830 ) 3831 3832 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3833 expression = this.expression 3834 3835 if expression: 3836 for arg in self.UNION_MODIFIERS: 3837 expr = expression.args.get(arg) 3838 if expr: 3839 this.set(arg, expr.pop()) 3840 3841 return this 3842 3843 def _parse_expression(self) -> t.Optional[exp.Expression]: 3844 return self._parse_alias(self._parse_conjunction()) 3845 3846 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3847 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3848 3849 def _parse_equality(self) -> t.Optional[exp.Expression]: 3850 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3851 3852 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3853 return self._parse_tokens(self._parse_range, self.COMPARISON) 3854 3855 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3856 this = this or self._parse_bitwise() 3857 negate = self._match(TokenType.NOT) 3858 3859 if self._match_set(self.RANGE_PARSERS): 3860 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3861 if not expression: 3862 return this 3863 3864 this = expression 3865 elif self._match(TokenType.ISNULL): 3866 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3867 3868 # Postgres supports ISNULL and NOTNULL for conditions. 3869 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3870 if self._match(TokenType.NOTNULL): 3871 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3872 this = self.expression(exp.Not, this=this) 3873 3874 if negate: 3875 this = self.expression(exp.Not, this=this) 3876 3877 if self._match(TokenType.IS): 3878 this = self._parse_is(this) 3879 3880 return this 3881 3882 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3883 index = self._index - 1 3884 negate = self._match(TokenType.NOT) 3885 3886 if self._match_text_seq("DISTINCT", "FROM"): 3887 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3888 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3889 3890 expression = self._parse_null() or self._parse_boolean() 3891 if not expression: 3892 self._retreat(index) 3893 return None 3894 3895 this = self.expression(exp.Is, this=this, expression=expression) 3896 return self.expression(exp.Not, this=this) if negate else this 3897 3898 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3899 unnest = self._parse_unnest(with_alias=False) 3900 if unnest: 3901 this = self.expression(exp.In, this=this, unnest=unnest) 3902 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3903 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3904 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3905 3906 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3907 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 3908 else: 3909 this = self.expression(exp.In, this=this, expressions=expressions) 3910 3911 if matched_l_paren: 3912 self._match_r_paren(this) 3913 elif not self._match(TokenType.R_BRACKET, expression=this): 3914 self.raise_error("Expecting ]") 3915 else: 3916 this = self.expression(exp.In, this=this, field=self._parse_field()) 3917 3918 return this 3919 3920 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3921 low = self._parse_bitwise() 3922 self._match(TokenType.AND) 3923 high = self._parse_bitwise() 3924 return self.expression(exp.Between, this=this, low=low, high=high) 3925 3926 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3927 if not self._match(TokenType.ESCAPE): 3928 return this 3929 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3930 3931 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3932 index = self._index 3933 3934 if not self._match(TokenType.INTERVAL) and match_interval: 3935 return None 3936 3937 if self._match(TokenType.STRING, advance=False): 3938 this = self._parse_primary() 3939 else: 3940 this = self._parse_term() 3941 3942 if not this or ( 3943 isinstance(this, exp.Column) 3944 and not this.table 3945 and not this.this.quoted 3946 and this.name.upper() == "IS" 3947 ): 3948 self._retreat(index) 3949 return None 3950 3951 unit = self._parse_function() or ( 3952 not self._match(TokenType.ALIAS, advance=False) 3953 and self._parse_var(any_token=True, upper=True) 3954 ) 3955 3956 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3957 # each INTERVAL expression into this canonical form so it's easy to transpile 3958 if this and this.is_number: 3959 this = exp.Literal.string(this.name) 3960 elif this and this.is_string: 3961 parts = this.name.split() 3962 3963 if len(parts) == 2: 3964 if unit: 3965 # This is not actually a unit, it's something else (e.g. a "window side") 3966 unit = None 3967 self._retreat(self._index - 1) 3968 3969 this = exp.Literal.string(parts[0]) 3970 unit = self.expression(exp.Var, this=parts[1].upper()) 3971 3972 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 3973 unit = self.expression( 3974 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 3975 ) 3976 3977 return self.expression(exp.Interval, this=this, unit=unit) 3978 3979 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3980 this = self._parse_term() 3981 3982 while True: 3983 if self._match_set(self.BITWISE): 3984 this = self.expression( 3985 self.BITWISE[self._prev.token_type], 3986 this=this, 3987 expression=self._parse_term(), 3988 ) 3989 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3990 this = self.expression( 3991 exp.DPipe, 3992 this=this, 3993 expression=self._parse_term(), 3994 safe=not self.dialect.STRICT_STRING_CONCAT, 3995 ) 3996 elif self._match(TokenType.DQMARK): 3997 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3998 elif self._match_pair(TokenType.LT, TokenType.LT): 3999 this = self.expression( 4000 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4001 ) 4002 elif self._match_pair(TokenType.GT, TokenType.GT): 4003 this = self.expression( 4004 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4005 ) 4006 else: 4007 break 4008 4009 return this 4010 4011 def _parse_term(self) -> t.Optional[exp.Expression]: 4012 return self._parse_tokens(self._parse_factor, self.TERM) 4013 4014 def _parse_factor(self) -> t.Optional[exp.Expression]: 4015 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4016 this = parse_method() 4017 4018 while self._match_set(self.FACTOR): 4019 this = self.expression( 4020 self.FACTOR[self._prev.token_type], 4021 this=this, 4022 comments=self._prev_comments, 4023 expression=parse_method(), 4024 ) 4025 if isinstance(this, exp.Div): 4026 this.args["typed"] = self.dialect.TYPED_DIVISION 4027 this.args["safe"] = self.dialect.SAFE_DIVISION 4028 4029 return this 4030 4031 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4032 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4033 4034 def _parse_unary(self) -> t.Optional[exp.Expression]: 4035 if self._match_set(self.UNARY_PARSERS): 4036 return self.UNARY_PARSERS[self._prev.token_type](self) 4037 return self._parse_at_time_zone(self._parse_type()) 4038 4039 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 4040 interval = parse_interval and self._parse_interval() 4041 if interval: 4042 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4043 while True: 4044 index = self._index 4045 self._match(TokenType.PLUS) 4046 4047 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4048 self._retreat(index) 4049 break 4050 4051 interval = self.expression( # type: ignore 4052 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4053 ) 4054 4055 return interval 4056 4057 index = self._index 4058 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4059 this = self._parse_column() 4060 4061 if data_type: 4062 if isinstance(this, exp.Literal): 4063 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4064 if parser: 4065 return parser(self, this, data_type) 4066 return self.expression(exp.Cast, this=this, to=data_type) 4067 if not data_type.expressions: 4068 self._retreat(index) 4069 return self._parse_column() 4070 return self._parse_column_ops(data_type) 4071 4072 return this and self._parse_column_ops(this) 4073 4074 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4075 this = self._parse_type() 4076 if not this: 4077 return None 4078 4079 if isinstance(this, exp.Column) and not this.table: 4080 this = exp.var(this.name.upper()) 4081 4082 return self.expression( 4083 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4084 ) 4085 4086 def _parse_types( 4087 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4088 ) -> t.Optional[exp.Expression]: 4089 index = self._index 4090 4091 prefix = self._match_text_seq("SYSUDTLIB", ".") 4092 4093 if not self._match_set(self.TYPE_TOKENS): 4094 identifier = allow_identifiers and self._parse_id_var( 4095 any_token=False, tokens=(TokenType.VAR,) 4096 ) 4097 if identifier: 4098 tokens = self.dialect.tokenize(identifier.name) 4099 4100 if len(tokens) != 1: 4101 self.raise_error("Unexpected identifier", self._prev) 4102 4103 if tokens[0].token_type in self.TYPE_TOKENS: 4104 self._prev = tokens[0] 4105 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4106 type_name = identifier.name 4107 4108 while self._match(TokenType.DOT): 4109 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4110 4111 return exp.DataType.build(type_name, udt=True) 4112 else: 4113 self._retreat(self._index - 1) 4114 return None 4115 else: 4116 return None 4117 4118 type_token = self._prev.token_type 4119 4120 if type_token == TokenType.PSEUDO_TYPE: 4121 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4122 4123 if type_token == TokenType.OBJECT_IDENTIFIER: 4124 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4125 4126 nested = type_token in self.NESTED_TYPE_TOKENS 4127 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4128 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4129 expressions = None 4130 maybe_func = False 4131 4132 if self._match(TokenType.L_PAREN): 4133 if is_struct: 4134 expressions = self._parse_csv(self._parse_struct_types) 4135 elif nested: 4136 expressions = self._parse_csv( 4137 lambda: self._parse_types( 4138 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4139 ) 4140 ) 4141 elif type_token in self.ENUM_TYPE_TOKENS: 4142 expressions = self._parse_csv(self._parse_equality) 4143 elif is_aggregate: 4144 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4145 any_token=False, tokens=(TokenType.VAR,) 4146 ) 4147 if not func_or_ident or not self._match(TokenType.COMMA): 4148 return None 4149 expressions = self._parse_csv( 4150 lambda: self._parse_types( 4151 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4152 ) 4153 ) 4154 expressions.insert(0, func_or_ident) 4155 else: 4156 expressions = self._parse_csv(self._parse_type_size) 4157 4158 if not expressions or not self._match(TokenType.R_PAREN): 4159 self._retreat(index) 4160 return None 4161 4162 maybe_func = True 4163 4164 this: t.Optional[exp.Expression] = None 4165 values: t.Optional[t.List[exp.Expression]] = None 4166 4167 if nested and self._match(TokenType.LT): 4168 if is_struct: 4169 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4170 else: 4171 expressions = self._parse_csv( 4172 lambda: self._parse_types( 4173 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4174 ) 4175 ) 4176 4177 if not self._match(TokenType.GT): 4178 self.raise_error("Expecting >") 4179 4180 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4181 values = self._parse_csv(self._parse_conjunction) 4182 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4183 4184 if type_token in self.TIMESTAMPS: 4185 if self._match_text_seq("WITH", "TIME", "ZONE"): 4186 maybe_func = False 4187 tz_type = ( 4188 exp.DataType.Type.TIMETZ 4189 if type_token in self.TIMES 4190 else exp.DataType.Type.TIMESTAMPTZ 4191 ) 4192 this = exp.DataType(this=tz_type, expressions=expressions) 4193 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4194 maybe_func = False 4195 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4196 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4197 maybe_func = False 4198 elif type_token == TokenType.INTERVAL: 4199 unit = self._parse_var(upper=True) 4200 if unit: 4201 if self._match_text_seq("TO"): 4202 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4203 4204 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4205 else: 4206 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4207 4208 if maybe_func and check_func: 4209 index2 = self._index 4210 peek = self._parse_string() 4211 4212 if not peek: 4213 self._retreat(index) 4214 return None 4215 4216 self._retreat(index2) 4217 4218 if not this: 4219 if self._match_text_seq("UNSIGNED"): 4220 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4221 if not unsigned_type_token: 4222 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4223 4224 type_token = unsigned_type_token or type_token 4225 4226 this = exp.DataType( 4227 this=exp.DataType.Type[type_token.value], 4228 expressions=expressions, 4229 nested=nested, 4230 values=values, 4231 prefix=prefix, 4232 ) 4233 4234 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4235 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4236 4237 return this 4238 4239 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4240 index = self._index 4241 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4242 self._match(TokenType.COLON) 4243 column_def = self._parse_column_def(this) 4244 4245 if type_required and ( 4246 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4247 ): 4248 self._retreat(index) 4249 return self._parse_types() 4250 4251 return column_def 4252 4253 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4254 if not self._match_text_seq("AT", "TIME", "ZONE"): 4255 return this 4256 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4257 4258 def _parse_column(self) -> t.Optional[exp.Expression]: 4259 this = self._parse_column_reference() 4260 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4261 4262 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4263 this = self._parse_field() 4264 if ( 4265 not this 4266 and self._match(TokenType.VALUES, advance=False) 4267 and self.VALUES_FOLLOWED_BY_PAREN 4268 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4269 ): 4270 this = self._parse_id_var() 4271 4272 if isinstance(this, exp.Identifier): 4273 # We bubble up comments from the Identifier to the Column 4274 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4275 4276 return this 4277 4278 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4279 this = self._parse_bracket(this) 4280 4281 while self._match_set(self.COLUMN_OPERATORS): 4282 op_token = self._prev.token_type 4283 op = self.COLUMN_OPERATORS.get(op_token) 4284 4285 if op_token == TokenType.DCOLON: 4286 field = self._parse_types() 4287 if not field: 4288 self.raise_error("Expected type") 4289 elif op and self._curr: 4290 field = self._parse_column_reference() 4291 else: 4292 field = self._parse_field(any_token=True, anonymous_func=True) 4293 4294 if isinstance(field, exp.Func) and this: 4295 # bigquery allows function calls like x.y.count(...) 4296 # SAFE.SUBSTR(...) 4297 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4298 this = exp.replace_tree( 4299 this, 4300 lambda n: ( 4301 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4302 if n.table 4303 else n.this 4304 ) 4305 if isinstance(n, exp.Column) 4306 else n, 4307 ) 4308 4309 if op: 4310 this = op(self, this, field) 4311 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4312 this = self.expression( 4313 exp.Column, 4314 this=field, 4315 table=this.this, 4316 db=this.args.get("table"), 4317 catalog=this.args.get("db"), 4318 ) 4319 else: 4320 this = self.expression(exp.Dot, this=this, expression=field) 4321 this = self._parse_bracket(this) 4322 return this 4323 4324 def _parse_primary(self) -> t.Optional[exp.Expression]: 4325 if self._match_set(self.PRIMARY_PARSERS): 4326 token_type = self._prev.token_type 4327 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4328 4329 if token_type == TokenType.STRING: 4330 expressions = [primary] 4331 while self._match(TokenType.STRING): 4332 expressions.append(exp.Literal.string(self._prev.text)) 4333 4334 if len(expressions) > 1: 4335 return self.expression(exp.Concat, expressions=expressions) 4336 4337 return primary 4338 4339 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4340 return exp.Literal.number(f"0.{self._prev.text}") 4341 4342 if self._match(TokenType.L_PAREN): 4343 comments = self._prev_comments 4344 query = self._parse_select() 4345 4346 if query: 4347 expressions = [query] 4348 else: 4349 expressions = self._parse_expressions() 4350 4351 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4352 4353 if not this and self._match(TokenType.R_PAREN, advance=False): 4354 this = self.expression(exp.Tuple) 4355 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4356 this = self._parse_subquery(this=this, parse_alias=False) 4357 elif isinstance(this, exp.Subquery): 4358 this = self._parse_subquery( 4359 this=self._parse_set_operations(this), parse_alias=False 4360 ) 4361 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4362 this = self.expression(exp.Tuple, expressions=expressions) 4363 else: 4364 this = self.expression(exp.Paren, this=this) 4365 4366 if this: 4367 this.add_comments(comments) 4368 4369 self._match_r_paren(expression=this) 4370 return this 4371 4372 return None 4373 4374 def _parse_field( 4375 self, 4376 any_token: bool = False, 4377 tokens: t.Optional[t.Collection[TokenType]] = None, 4378 anonymous_func: bool = False, 4379 ) -> t.Optional[exp.Expression]: 4380 if anonymous_func: 4381 field = ( 4382 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4383 or self._parse_primary() 4384 ) 4385 else: 4386 field = self._parse_primary() or self._parse_function( 4387 anonymous=anonymous_func, any_token=any_token 4388 ) 4389 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4390 4391 def _parse_function( 4392 self, 4393 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4394 anonymous: bool = False, 4395 optional_parens: bool = True, 4396 any_token: bool = False, 4397 ) -> t.Optional[exp.Expression]: 4398 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4399 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4400 fn_syntax = False 4401 if ( 4402 self._match(TokenType.L_BRACE, advance=False) 4403 and self._next 4404 and self._next.text.upper() == "FN" 4405 ): 4406 self._advance(2) 4407 fn_syntax = True 4408 4409 func = self._parse_function_call( 4410 functions=functions, 4411 anonymous=anonymous, 4412 optional_parens=optional_parens, 4413 any_token=any_token, 4414 ) 4415 4416 if fn_syntax: 4417 self._match(TokenType.R_BRACE) 4418 4419 return func 4420 4421 def _parse_function_call( 4422 self, 4423 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4424 anonymous: bool = False, 4425 optional_parens: bool = True, 4426 any_token: bool = False, 4427 ) -> t.Optional[exp.Expression]: 4428 if not self._curr: 4429 return None 4430 4431 comments = self._curr.comments 4432 token_type = self._curr.token_type 4433 this = self._curr.text 4434 upper = this.upper() 4435 4436 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4437 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4438 self._advance() 4439 return self._parse_window(parser(self)) 4440 4441 if not self._next or self._next.token_type != TokenType.L_PAREN: 4442 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4443 self._advance() 4444 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4445 4446 return None 4447 4448 if any_token: 4449 if token_type in self.RESERVED_TOKENS: 4450 return None 4451 elif token_type not in self.FUNC_TOKENS: 4452 return None 4453 4454 self._advance(2) 4455 4456 parser = self.FUNCTION_PARSERS.get(upper) 4457 if parser and not anonymous: 4458 this = parser(self) 4459 else: 4460 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4461 4462 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4463 this = self.expression(subquery_predicate, this=self._parse_select()) 4464 self._match_r_paren() 4465 return this 4466 4467 if functions is None: 4468 functions = self.FUNCTIONS 4469 4470 function = functions.get(upper) 4471 4472 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4473 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4474 4475 if alias: 4476 args = self._kv_to_prop_eq(args) 4477 4478 if function and not anonymous: 4479 if "dialect" in function.__code__.co_varnames: 4480 func = function(args, dialect=self.dialect) 4481 else: 4482 func = function(args) 4483 4484 func = self.validate_expression(func, args) 4485 if not self.dialect.NORMALIZE_FUNCTIONS: 4486 func.meta["name"] = this 4487 4488 this = func 4489 else: 4490 if token_type == TokenType.IDENTIFIER: 4491 this = exp.Identifier(this=this, quoted=True) 4492 this = self.expression(exp.Anonymous, this=this, expressions=args) 4493 4494 if isinstance(this, exp.Expression): 4495 this.add_comments(comments) 4496 4497 self._match_r_paren(this) 4498 return self._parse_window(this) 4499 4500 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4501 transformed = [] 4502 4503 for e in expressions: 4504 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4505 if isinstance(e, exp.Alias): 4506 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4507 4508 if not isinstance(e, exp.PropertyEQ): 4509 e = self.expression( 4510 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4511 ) 4512 4513 if isinstance(e.this, exp.Column): 4514 e.this.replace(e.this.this) 4515 4516 transformed.append(e) 4517 4518 return transformed 4519 4520 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4521 return self._parse_column_def(self._parse_id_var()) 4522 4523 def _parse_user_defined_function( 4524 self, kind: t.Optional[TokenType] = None 4525 ) -> t.Optional[exp.Expression]: 4526 this = self._parse_id_var() 4527 4528 while self._match(TokenType.DOT): 4529 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4530 4531 if not self._match(TokenType.L_PAREN): 4532 return this 4533 4534 expressions = self._parse_csv(self._parse_function_parameter) 4535 self._match_r_paren() 4536 return self.expression( 4537 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4538 ) 4539 4540 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4541 literal = self._parse_primary() 4542 if literal: 4543 return self.expression(exp.Introducer, this=token.text, expression=literal) 4544 4545 return self.expression(exp.Identifier, this=token.text) 4546 4547 def _parse_session_parameter(self) -> exp.SessionParameter: 4548 kind = None 4549 this = self._parse_id_var() or self._parse_primary() 4550 4551 if this and self._match(TokenType.DOT): 4552 kind = this.name 4553 this = self._parse_var() or self._parse_primary() 4554 4555 return self.expression(exp.SessionParameter, this=this, kind=kind) 4556 4557 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4558 index = self._index 4559 4560 if self._match(TokenType.L_PAREN): 4561 expressions = t.cast( 4562 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4563 ) 4564 4565 if not self._match(TokenType.R_PAREN): 4566 self._retreat(index) 4567 else: 4568 expressions = [self._parse_id_var()] 4569 4570 if self._match_set(self.LAMBDAS): 4571 return self.LAMBDAS[self._prev.token_type](self, expressions) 4572 4573 self._retreat(index) 4574 4575 this: t.Optional[exp.Expression] 4576 4577 if self._match(TokenType.DISTINCT): 4578 this = self.expression( 4579 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4580 ) 4581 else: 4582 this = self._parse_select_or_expression(alias=alias) 4583 4584 return self._parse_limit( 4585 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4586 ) 4587 4588 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4589 index = self._index 4590 if not self._match(TokenType.L_PAREN): 4591 return this 4592 4593 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4594 # expr can be of both types 4595 if self._match_set(self.SELECT_START_TOKENS): 4596 self._retreat(index) 4597 return this 4598 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4599 self._match_r_paren() 4600 return self.expression(exp.Schema, this=this, expressions=args) 4601 4602 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4603 return self._parse_column_def(self._parse_field(any_token=True)) 4604 4605 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4606 # column defs are not really columns, they're identifiers 4607 if isinstance(this, exp.Column): 4608 this = this.this 4609 4610 kind = self._parse_types(schema=True) 4611 4612 if self._match_text_seq("FOR", "ORDINALITY"): 4613 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4614 4615 constraints: t.List[exp.Expression] = [] 4616 4617 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4618 ("ALIAS", "MATERIALIZED") 4619 ): 4620 persisted = self._prev.text.upper() == "MATERIALIZED" 4621 constraints.append( 4622 self.expression( 4623 exp.ComputedColumnConstraint, 4624 this=self._parse_conjunction(), 4625 persisted=persisted or self._match_text_seq("PERSISTED"), 4626 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4627 ) 4628 ) 4629 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4630 self._match(TokenType.ALIAS) 4631 constraints.append( 4632 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4633 ) 4634 4635 while True: 4636 constraint = self._parse_column_constraint() 4637 if not constraint: 4638 break 4639 constraints.append(constraint) 4640 4641 if not kind and not constraints: 4642 return this 4643 4644 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4645 4646 def _parse_auto_increment( 4647 self, 4648 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4649 start = None 4650 increment = None 4651 4652 if self._match(TokenType.L_PAREN, advance=False): 4653 args = self._parse_wrapped_csv(self._parse_bitwise) 4654 start = seq_get(args, 0) 4655 increment = seq_get(args, 1) 4656 elif self._match_text_seq("START"): 4657 start = self._parse_bitwise() 4658 self._match_text_seq("INCREMENT") 4659 increment = self._parse_bitwise() 4660 4661 if start and increment: 4662 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4663 4664 return exp.AutoIncrementColumnConstraint() 4665 4666 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4667 if not self._match_text_seq("REFRESH"): 4668 self._retreat(self._index - 1) 4669 return None 4670 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4671 4672 def _parse_compress(self) -> exp.CompressColumnConstraint: 4673 if self._match(TokenType.L_PAREN, advance=False): 4674 return self.expression( 4675 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4676 ) 4677 4678 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4679 4680 def _parse_generated_as_identity( 4681 self, 4682 ) -> ( 4683 exp.GeneratedAsIdentityColumnConstraint 4684 | exp.ComputedColumnConstraint 4685 | exp.GeneratedAsRowColumnConstraint 4686 ): 4687 if self._match_text_seq("BY", "DEFAULT"): 4688 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4689 this = self.expression( 4690 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4691 ) 4692 else: 4693 self._match_text_seq("ALWAYS") 4694 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4695 4696 self._match(TokenType.ALIAS) 4697 4698 if self._match_text_seq("ROW"): 4699 start = self._match_text_seq("START") 4700 if not start: 4701 self._match(TokenType.END) 4702 hidden = self._match_text_seq("HIDDEN") 4703 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4704 4705 identity = self._match_text_seq("IDENTITY") 4706 4707 if self._match(TokenType.L_PAREN): 4708 if self._match(TokenType.START_WITH): 4709 this.set("start", self._parse_bitwise()) 4710 if self._match_text_seq("INCREMENT", "BY"): 4711 this.set("increment", self._parse_bitwise()) 4712 if self._match_text_seq("MINVALUE"): 4713 this.set("minvalue", self._parse_bitwise()) 4714 if self._match_text_seq("MAXVALUE"): 4715 this.set("maxvalue", self._parse_bitwise()) 4716 4717 if self._match_text_seq("CYCLE"): 4718 this.set("cycle", True) 4719 elif self._match_text_seq("NO", "CYCLE"): 4720 this.set("cycle", False) 4721 4722 if not identity: 4723 this.set("expression", self._parse_range()) 4724 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4725 args = self._parse_csv(self._parse_bitwise) 4726 this.set("start", seq_get(args, 0)) 4727 this.set("increment", seq_get(args, 1)) 4728 4729 self._match_r_paren() 4730 4731 return this 4732 4733 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4734 self._match_text_seq("LENGTH") 4735 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4736 4737 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4738 if self._match_text_seq("NULL"): 4739 return self.expression(exp.NotNullColumnConstraint) 4740 if self._match_text_seq("CASESPECIFIC"): 4741 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4742 if self._match_text_seq("FOR", "REPLICATION"): 4743 return self.expression(exp.NotForReplicationColumnConstraint) 4744 return None 4745 4746 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4747 if self._match(TokenType.CONSTRAINT): 4748 this = self._parse_id_var() 4749 else: 4750 this = None 4751 4752 if self._match_texts(self.CONSTRAINT_PARSERS): 4753 return self.expression( 4754 exp.ColumnConstraint, 4755 this=this, 4756 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4757 ) 4758 4759 return this 4760 4761 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4762 if not self._match(TokenType.CONSTRAINT): 4763 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4764 4765 return self.expression( 4766 exp.Constraint, 4767 this=self._parse_id_var(), 4768 expressions=self._parse_unnamed_constraints(), 4769 ) 4770 4771 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4772 constraints = [] 4773 while True: 4774 constraint = self._parse_unnamed_constraint() or self._parse_function() 4775 if not constraint: 4776 break 4777 constraints.append(constraint) 4778 4779 return constraints 4780 4781 def _parse_unnamed_constraint( 4782 self, constraints: t.Optional[t.Collection[str]] = None 4783 ) -> t.Optional[exp.Expression]: 4784 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4785 constraints or self.CONSTRAINT_PARSERS 4786 ): 4787 return None 4788 4789 constraint = self._prev.text.upper() 4790 if constraint not in self.CONSTRAINT_PARSERS: 4791 self.raise_error(f"No parser found for schema constraint {constraint}.") 4792 4793 return self.CONSTRAINT_PARSERS[constraint](self) 4794 4795 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4796 self._match_text_seq("KEY") 4797 return self.expression( 4798 exp.UniqueColumnConstraint, 4799 this=self._parse_schema(self._parse_id_var(any_token=False)), 4800 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4801 on_conflict=self._parse_on_conflict(), 4802 ) 4803 4804 def _parse_key_constraint_options(self) -> t.List[str]: 4805 options = [] 4806 while True: 4807 if not self._curr: 4808 break 4809 4810 if self._match(TokenType.ON): 4811 action = None 4812 on = self._advance_any() and self._prev.text 4813 4814 if self._match_text_seq("NO", "ACTION"): 4815 action = "NO ACTION" 4816 elif self._match_text_seq("CASCADE"): 4817 action = "CASCADE" 4818 elif self._match_text_seq("RESTRICT"): 4819 action = "RESTRICT" 4820 elif self._match_pair(TokenType.SET, TokenType.NULL): 4821 action = "SET NULL" 4822 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4823 action = "SET DEFAULT" 4824 else: 4825 self.raise_error("Invalid key constraint") 4826 4827 options.append(f"ON {on} {action}") 4828 elif self._match_text_seq("NOT", "ENFORCED"): 4829 options.append("NOT ENFORCED") 4830 elif self._match_text_seq("DEFERRABLE"): 4831 options.append("DEFERRABLE") 4832 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4833 options.append("INITIALLY DEFERRED") 4834 elif self._match_text_seq("NORELY"): 4835 options.append("NORELY") 4836 elif self._match_text_seq("MATCH", "FULL"): 4837 options.append("MATCH FULL") 4838 else: 4839 break 4840 4841 return options 4842 4843 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4844 if match and not self._match(TokenType.REFERENCES): 4845 return None 4846 4847 expressions = None 4848 this = self._parse_table(schema=True) 4849 options = self._parse_key_constraint_options() 4850 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4851 4852 def _parse_foreign_key(self) -> exp.ForeignKey: 4853 expressions = self._parse_wrapped_id_vars() 4854 reference = self._parse_references() 4855 options = {} 4856 4857 while self._match(TokenType.ON): 4858 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4859 self.raise_error("Expected DELETE or UPDATE") 4860 4861 kind = self._prev.text.lower() 4862 4863 if self._match_text_seq("NO", "ACTION"): 4864 action = "NO ACTION" 4865 elif self._match(TokenType.SET): 4866 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4867 action = "SET " + self._prev.text.upper() 4868 else: 4869 self._advance() 4870 action = self._prev.text.upper() 4871 4872 options[kind] = action 4873 4874 return self.expression( 4875 exp.ForeignKey, 4876 expressions=expressions, 4877 reference=reference, 4878 **options, # type: ignore 4879 ) 4880 4881 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4882 return self._parse_field() 4883 4884 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4885 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4886 self._retreat(self._index - 1) 4887 return None 4888 4889 id_vars = self._parse_wrapped_id_vars() 4890 return self.expression( 4891 exp.PeriodForSystemTimeConstraint, 4892 this=seq_get(id_vars, 0), 4893 expression=seq_get(id_vars, 1), 4894 ) 4895 4896 def _parse_primary_key( 4897 self, wrapped_optional: bool = False, in_props: bool = False 4898 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4899 desc = ( 4900 self._match_set((TokenType.ASC, TokenType.DESC)) 4901 and self._prev.token_type == TokenType.DESC 4902 ) 4903 4904 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4905 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4906 4907 expressions = self._parse_wrapped_csv( 4908 self._parse_primary_key_part, optional=wrapped_optional 4909 ) 4910 options = self._parse_key_constraint_options() 4911 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4912 4913 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4914 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4915 4916 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4917 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4918 return this 4919 4920 bracket_kind = self._prev.token_type 4921 expressions = self._parse_csv( 4922 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4923 ) 4924 4925 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4926 self.raise_error("Expected ]") 4927 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4928 self.raise_error("Expected }") 4929 4930 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4931 if bracket_kind == TokenType.L_BRACE: 4932 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4933 elif not this or this.name.upper() == "ARRAY": 4934 this = self.expression(exp.Array, expressions=expressions) 4935 else: 4936 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4937 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4938 4939 self._add_comments(this) 4940 return self._parse_bracket(this) 4941 4942 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4943 if self._match(TokenType.COLON): 4944 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4945 return this 4946 4947 def _parse_case(self) -> t.Optional[exp.Expression]: 4948 ifs = [] 4949 default = None 4950 4951 comments = self._prev_comments 4952 expression = self._parse_conjunction() 4953 4954 while self._match(TokenType.WHEN): 4955 this = self._parse_conjunction() 4956 self._match(TokenType.THEN) 4957 then = self._parse_conjunction() 4958 ifs.append(self.expression(exp.If, this=this, true=then)) 4959 4960 if self._match(TokenType.ELSE): 4961 default = self._parse_conjunction() 4962 4963 if not self._match(TokenType.END): 4964 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4965 default = exp.column("interval") 4966 else: 4967 self.raise_error("Expected END after CASE", self._prev) 4968 4969 return self.expression( 4970 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4971 ) 4972 4973 def _parse_if(self) -> t.Optional[exp.Expression]: 4974 if self._match(TokenType.L_PAREN): 4975 args = self._parse_csv(self._parse_conjunction) 4976 this = self.validate_expression(exp.If.from_arg_list(args), args) 4977 self._match_r_paren() 4978 else: 4979 index = self._index - 1 4980 4981 if self.NO_PAREN_IF_COMMANDS and index == 0: 4982 return self._parse_as_command(self._prev) 4983 4984 condition = self._parse_conjunction() 4985 4986 if not condition: 4987 self._retreat(index) 4988 return None 4989 4990 self._match(TokenType.THEN) 4991 true = self._parse_conjunction() 4992 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4993 self._match(TokenType.END) 4994 this = self.expression(exp.If, this=condition, true=true, false=false) 4995 4996 return this 4997 4998 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4999 if not self._match_text_seq("VALUE", "FOR"): 5000 self._retreat(self._index - 1) 5001 return None 5002 5003 return self.expression( 5004 exp.NextValueFor, 5005 this=self._parse_column(), 5006 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5007 ) 5008 5009 def _parse_extract(self) -> exp.Extract: 5010 this = self._parse_function() or self._parse_var() or self._parse_type() 5011 5012 if self._match(TokenType.FROM): 5013 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5014 5015 if not self._match(TokenType.COMMA): 5016 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5017 5018 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5019 5020 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5021 this = self._parse_conjunction() 5022 5023 if not self._match(TokenType.ALIAS): 5024 if self._match(TokenType.COMMA): 5025 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5026 5027 self.raise_error("Expected AS after CAST") 5028 5029 fmt = None 5030 to = self._parse_types() 5031 5032 if self._match(TokenType.FORMAT): 5033 fmt_string = self._parse_string() 5034 fmt = self._parse_at_time_zone(fmt_string) 5035 5036 if not to: 5037 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5038 if to.this in exp.DataType.TEMPORAL_TYPES: 5039 this = self.expression( 5040 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5041 this=this, 5042 format=exp.Literal.string( 5043 format_time( 5044 fmt_string.this if fmt_string else "", 5045 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5046 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5047 ) 5048 ), 5049 ) 5050 5051 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5052 this.set("zone", fmt.args["zone"]) 5053 return this 5054 elif not to: 5055 self.raise_error("Expected TYPE after CAST") 5056 elif isinstance(to, exp.Identifier): 5057 to = exp.DataType.build(to.name, udt=True) 5058 elif to.this == exp.DataType.Type.CHAR: 5059 if self._match(TokenType.CHARACTER_SET): 5060 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5061 5062 return self.expression( 5063 exp.Cast if strict else exp.TryCast, 5064 this=this, 5065 to=to, 5066 format=fmt, 5067 safe=safe, 5068 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5069 ) 5070 5071 def _parse_string_agg(self) -> exp.Expression: 5072 if self._match(TokenType.DISTINCT): 5073 args: t.List[t.Optional[exp.Expression]] = [ 5074 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5075 ] 5076 if self._match(TokenType.COMMA): 5077 args.extend(self._parse_csv(self._parse_conjunction)) 5078 else: 5079 args = self._parse_csv(self._parse_conjunction) # type: ignore 5080 5081 index = self._index 5082 if not self._match(TokenType.R_PAREN) and args: 5083 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5084 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5085 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5086 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5087 5088 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5089 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5090 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5091 if not self._match_text_seq("WITHIN", "GROUP"): 5092 self._retreat(index) 5093 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5094 5095 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5096 order = self._parse_order(this=seq_get(args, 0)) 5097 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5098 5099 def _parse_convert( 5100 self, strict: bool, safe: t.Optional[bool] = None 5101 ) -> t.Optional[exp.Expression]: 5102 this = self._parse_bitwise() 5103 5104 if self._match(TokenType.USING): 5105 to: t.Optional[exp.Expression] = self.expression( 5106 exp.CharacterSet, this=self._parse_var() 5107 ) 5108 elif self._match(TokenType.COMMA): 5109 to = self._parse_types() 5110 else: 5111 to = None 5112 5113 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5114 5115 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5116 """ 5117 There are generally two variants of the DECODE function: 5118 5119 - DECODE(bin, charset) 5120 - DECODE(expression, search, result [, search, result] ... [, default]) 5121 5122 The second variant will always be parsed into a CASE expression. Note that NULL 5123 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5124 instead of relying on pattern matching. 5125 """ 5126 args = self._parse_csv(self._parse_conjunction) 5127 5128 if len(args) < 3: 5129 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5130 5131 expression, *expressions = args 5132 if not expression: 5133 return None 5134 5135 ifs = [] 5136 for search, result in zip(expressions[::2], expressions[1::2]): 5137 if not search or not result: 5138 return None 5139 5140 if isinstance(search, exp.Literal): 5141 ifs.append( 5142 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5143 ) 5144 elif isinstance(search, exp.Null): 5145 ifs.append( 5146 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5147 ) 5148 else: 5149 cond = exp.or_( 5150 exp.EQ(this=expression.copy(), expression=search), 5151 exp.and_( 5152 exp.Is(this=expression.copy(), expression=exp.Null()), 5153 exp.Is(this=search.copy(), expression=exp.Null()), 5154 copy=False, 5155 ), 5156 copy=False, 5157 ) 5158 ifs.append(exp.If(this=cond, true=result)) 5159 5160 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5161 5162 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5163 self._match_text_seq("KEY") 5164 key = self._parse_column() 5165 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5166 self._match_text_seq("VALUE") 5167 value = self._parse_bitwise() 5168 5169 if not key and not value: 5170 return None 5171 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5172 5173 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5174 if not this or not self._match_text_seq("FORMAT", "JSON"): 5175 return this 5176 5177 return self.expression(exp.FormatJson, this=this) 5178 5179 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5180 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5181 for value in values: 5182 if self._match_text_seq(value, "ON", on): 5183 return f"{value} ON {on}" 5184 5185 return None 5186 5187 @t.overload 5188 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5189 5190 @t.overload 5191 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5192 5193 def _parse_json_object(self, agg=False): 5194 star = self._parse_star() 5195 expressions = ( 5196 [star] 5197 if star 5198 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5199 ) 5200 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5201 5202 unique_keys = None 5203 if self._match_text_seq("WITH", "UNIQUE"): 5204 unique_keys = True 5205 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5206 unique_keys = False 5207 5208 self._match_text_seq("KEYS") 5209 5210 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5211 self._parse_type() 5212 ) 5213 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5214 5215 return self.expression( 5216 exp.JSONObjectAgg if agg else exp.JSONObject, 5217 expressions=expressions, 5218 null_handling=null_handling, 5219 unique_keys=unique_keys, 5220 return_type=return_type, 5221 encoding=encoding, 5222 ) 5223 5224 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5225 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5226 if not self._match_text_seq("NESTED"): 5227 this = self._parse_id_var() 5228 kind = self._parse_types(allow_identifiers=False) 5229 nested = None 5230 else: 5231 this = None 5232 kind = None 5233 nested = True 5234 5235 path = self._match_text_seq("PATH") and self._parse_string() 5236 nested_schema = nested and self._parse_json_schema() 5237 5238 return self.expression( 5239 exp.JSONColumnDef, 5240 this=this, 5241 kind=kind, 5242 path=path, 5243 nested_schema=nested_schema, 5244 ) 5245 5246 def _parse_json_schema(self) -> exp.JSONSchema: 5247 self._match_text_seq("COLUMNS") 5248 return self.expression( 5249 exp.JSONSchema, 5250 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5251 ) 5252 5253 def _parse_json_table(self) -> exp.JSONTable: 5254 this = self._parse_format_json(self._parse_bitwise()) 5255 path = self._match(TokenType.COMMA) and self._parse_string() 5256 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5257 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5258 schema = self._parse_json_schema() 5259 5260 return exp.JSONTable( 5261 this=this, 5262 schema=schema, 5263 path=path, 5264 error_handling=error_handling, 5265 empty_handling=empty_handling, 5266 ) 5267 5268 def _parse_match_against(self) -> exp.MatchAgainst: 5269 expressions = self._parse_csv(self._parse_column) 5270 5271 self._match_text_seq(")", "AGAINST", "(") 5272 5273 this = self._parse_string() 5274 5275 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5276 modifier = "IN NATURAL LANGUAGE MODE" 5277 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5278 modifier = f"{modifier} WITH QUERY EXPANSION" 5279 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5280 modifier = "IN BOOLEAN MODE" 5281 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5282 modifier = "WITH QUERY EXPANSION" 5283 else: 5284 modifier = None 5285 5286 return self.expression( 5287 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5288 ) 5289 5290 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5291 def _parse_open_json(self) -> exp.OpenJSON: 5292 this = self._parse_bitwise() 5293 path = self._match(TokenType.COMMA) and self._parse_string() 5294 5295 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5296 this = self._parse_field(any_token=True) 5297 kind = self._parse_types() 5298 path = self._parse_string() 5299 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5300 5301 return self.expression( 5302 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5303 ) 5304 5305 expressions = None 5306 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5307 self._match_l_paren() 5308 expressions = self._parse_csv(_parse_open_json_column_def) 5309 5310 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5311 5312 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5313 args = self._parse_csv(self._parse_bitwise) 5314 5315 if self._match(TokenType.IN): 5316 return self.expression( 5317 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5318 ) 5319 5320 if haystack_first: 5321 haystack = seq_get(args, 0) 5322 needle = seq_get(args, 1) 5323 else: 5324 needle = seq_get(args, 0) 5325 haystack = seq_get(args, 1) 5326 5327 return self.expression( 5328 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5329 ) 5330 5331 def _parse_predict(self) -> exp.Predict: 5332 self._match_text_seq("MODEL") 5333 this = self._parse_table() 5334 5335 self._match(TokenType.COMMA) 5336 self._match_text_seq("TABLE") 5337 5338 return self.expression( 5339 exp.Predict, 5340 this=this, 5341 expression=self._parse_table(), 5342 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5343 ) 5344 5345 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5346 args = self._parse_csv(self._parse_table) 5347 return exp.JoinHint(this=func_name.upper(), expressions=args) 5348 5349 def _parse_substring(self) -> exp.Substring: 5350 # Postgres supports the form: substring(string [from int] [for int]) 5351 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5352 5353 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5354 5355 if self._match(TokenType.FROM): 5356 args.append(self._parse_bitwise()) 5357 if self._match(TokenType.FOR): 5358 if len(args) == 1: 5359 args.append(exp.Literal.number(1)) 5360 args.append(self._parse_bitwise()) 5361 5362 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5363 5364 def _parse_trim(self) -> exp.Trim: 5365 # https://www.w3resource.com/sql/character-functions/trim.php 5366 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5367 5368 position = None 5369 collation = None 5370 expression = None 5371 5372 if self._match_texts(self.TRIM_TYPES): 5373 position = self._prev.text.upper() 5374 5375 this = self._parse_bitwise() 5376 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5377 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5378 expression = self._parse_bitwise() 5379 5380 if invert_order: 5381 this, expression = expression, this 5382 5383 if self._match(TokenType.COLLATE): 5384 collation = self._parse_bitwise() 5385 5386 return self.expression( 5387 exp.Trim, this=this, position=position, expression=expression, collation=collation 5388 ) 5389 5390 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5391 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5392 5393 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5394 return self._parse_window(self._parse_id_var(), alias=True) 5395 5396 def _parse_respect_or_ignore_nulls( 5397 self, this: t.Optional[exp.Expression] 5398 ) -> t.Optional[exp.Expression]: 5399 if self._match_text_seq("IGNORE", "NULLS"): 5400 return self.expression(exp.IgnoreNulls, this=this) 5401 if self._match_text_seq("RESPECT", "NULLS"): 5402 return self.expression(exp.RespectNulls, this=this) 5403 return this 5404 5405 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5406 if self._match(TokenType.HAVING): 5407 self._match_texts(("MAX", "MIN")) 5408 max = self._prev.text.upper() != "MIN" 5409 return self.expression( 5410 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5411 ) 5412 5413 return this 5414 5415 def _parse_window( 5416 self, this: t.Optional[exp.Expression], alias: bool = False 5417 ) -> t.Optional[exp.Expression]: 5418 func = this 5419 comments = func.comments if isinstance(func, exp.Expression) else None 5420 5421 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5422 self._match(TokenType.WHERE) 5423 this = self.expression( 5424 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5425 ) 5426 self._match_r_paren() 5427 5428 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5429 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5430 if self._match_text_seq("WITHIN", "GROUP"): 5431 order = self._parse_wrapped(self._parse_order) 5432 this = self.expression(exp.WithinGroup, this=this, expression=order) 5433 5434 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5435 # Some dialects choose to implement and some do not. 5436 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5437 5438 # There is some code above in _parse_lambda that handles 5439 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5440 5441 # The below changes handle 5442 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5443 5444 # Oracle allows both formats 5445 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5446 # and Snowflake chose to do the same for familiarity 5447 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5448 if isinstance(this, exp.AggFunc): 5449 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5450 5451 if ignore_respect and ignore_respect is not this: 5452 ignore_respect.replace(ignore_respect.this) 5453 this = self.expression(ignore_respect.__class__, this=this) 5454 5455 this = self._parse_respect_or_ignore_nulls(this) 5456 5457 # bigquery select from window x AS (partition by ...) 5458 if alias: 5459 over = None 5460 self._match(TokenType.ALIAS) 5461 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5462 return this 5463 else: 5464 over = self._prev.text.upper() 5465 5466 if comments and isinstance(func, exp.Expression): 5467 func.pop_comments() 5468 5469 if not self._match(TokenType.L_PAREN): 5470 return self.expression( 5471 exp.Window, 5472 comments=comments, 5473 this=this, 5474 alias=self._parse_id_var(False), 5475 over=over, 5476 ) 5477 5478 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5479 5480 first = self._match(TokenType.FIRST) 5481 if self._match_text_seq("LAST"): 5482 first = False 5483 5484 partition, order = self._parse_partition_and_order() 5485 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5486 5487 if kind: 5488 self._match(TokenType.BETWEEN) 5489 start = self._parse_window_spec() 5490 self._match(TokenType.AND) 5491 end = self._parse_window_spec() 5492 5493 spec = self.expression( 5494 exp.WindowSpec, 5495 kind=kind, 5496 start=start["value"], 5497 start_side=start["side"], 5498 end=end["value"], 5499 end_side=end["side"], 5500 ) 5501 else: 5502 spec = None 5503 5504 self._match_r_paren() 5505 5506 window = self.expression( 5507 exp.Window, 5508 comments=comments, 5509 this=this, 5510 partition_by=partition, 5511 order=order, 5512 spec=spec, 5513 alias=window_alias, 5514 over=over, 5515 first=first, 5516 ) 5517 5518 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5519 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5520 return self._parse_window(window, alias=alias) 5521 5522 return window 5523 5524 def _parse_partition_and_order( 5525 self, 5526 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5527 return self._parse_partition_by(), self._parse_order() 5528 5529 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5530 self._match(TokenType.BETWEEN) 5531 5532 return { 5533 "value": ( 5534 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5535 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5536 or self._parse_bitwise() 5537 ), 5538 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5539 } 5540 5541 def _parse_alias( 5542 self, this: t.Optional[exp.Expression], explicit: bool = False 5543 ) -> t.Optional[exp.Expression]: 5544 any_token = self._match(TokenType.ALIAS) 5545 comments = self._prev_comments or [] 5546 5547 if explicit and not any_token: 5548 return this 5549 5550 if self._match(TokenType.L_PAREN): 5551 aliases = self.expression( 5552 exp.Aliases, 5553 comments=comments, 5554 this=this, 5555 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5556 ) 5557 self._match_r_paren(aliases) 5558 return aliases 5559 5560 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5561 self.STRING_ALIASES and self._parse_string_as_identifier() 5562 ) 5563 5564 if alias: 5565 comments.extend(alias.pop_comments()) 5566 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5567 column = this.this 5568 5569 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5570 if not this.comments and column and column.comments: 5571 this.comments = column.pop_comments() 5572 5573 return this 5574 5575 def _parse_id_var( 5576 self, 5577 any_token: bool = True, 5578 tokens: t.Optional[t.Collection[TokenType]] = None, 5579 ) -> t.Optional[exp.Expression]: 5580 expression = self._parse_identifier() 5581 if not expression and ( 5582 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5583 ): 5584 quoted = self._prev.token_type == TokenType.STRING 5585 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5586 5587 return expression 5588 5589 def _parse_string(self) -> t.Optional[exp.Expression]: 5590 if self._match_set(self.STRING_PARSERS): 5591 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5592 return self._parse_placeholder() 5593 5594 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5595 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5596 5597 def _parse_number(self) -> t.Optional[exp.Expression]: 5598 if self._match_set(self.NUMERIC_PARSERS): 5599 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5600 return self._parse_placeholder() 5601 5602 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5603 if self._match(TokenType.IDENTIFIER): 5604 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5605 return self._parse_placeholder() 5606 5607 def _parse_var( 5608 self, 5609 any_token: bool = False, 5610 tokens: t.Optional[t.Collection[TokenType]] = None, 5611 upper: bool = False, 5612 ) -> t.Optional[exp.Expression]: 5613 if ( 5614 (any_token and self._advance_any()) 5615 or self._match(TokenType.VAR) 5616 or (self._match_set(tokens) if tokens else False) 5617 ): 5618 return self.expression( 5619 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5620 ) 5621 return self._parse_placeholder() 5622 5623 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5624 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5625 self._advance() 5626 return self._prev 5627 return None 5628 5629 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5630 return self._parse_var() or self._parse_string() 5631 5632 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5633 return self._parse_primary() or self._parse_var(any_token=True) 5634 5635 def _parse_null(self) -> t.Optional[exp.Expression]: 5636 if self._match_set(self.NULL_TOKENS): 5637 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5638 return self._parse_placeholder() 5639 5640 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5641 if self._match(TokenType.TRUE): 5642 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5643 if self._match(TokenType.FALSE): 5644 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5645 return self._parse_placeholder() 5646 5647 def _parse_star(self) -> t.Optional[exp.Expression]: 5648 if self._match(TokenType.STAR): 5649 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5650 return self._parse_placeholder() 5651 5652 def _parse_parameter(self) -> exp.Parameter: 5653 this = self._parse_identifier() or self._parse_primary_or_var() 5654 return self.expression(exp.Parameter, this=this) 5655 5656 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5657 if self._match_set(self.PLACEHOLDER_PARSERS): 5658 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5659 if placeholder: 5660 return placeholder 5661 self._advance(-1) 5662 return None 5663 5664 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5665 if not self._match(TokenType.EXCEPT): 5666 return None 5667 if self._match(TokenType.L_PAREN, advance=False): 5668 return self._parse_wrapped_csv(self._parse_column) 5669 5670 except_column = self._parse_column() 5671 return [except_column] if except_column else None 5672 5673 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5674 if not self._match(TokenType.REPLACE): 5675 return None 5676 if self._match(TokenType.L_PAREN, advance=False): 5677 return self._parse_wrapped_csv(self._parse_expression) 5678 5679 replace_expression = self._parse_expression() 5680 return [replace_expression] if replace_expression else None 5681 5682 def _parse_csv( 5683 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5684 ) -> t.List[exp.Expression]: 5685 parse_result = parse_method() 5686 items = [parse_result] if parse_result is not None else [] 5687 5688 while self._match(sep): 5689 self._add_comments(parse_result) 5690 parse_result = parse_method() 5691 if parse_result is not None: 5692 items.append(parse_result) 5693 5694 return items 5695 5696 def _parse_tokens( 5697 self, parse_method: t.Callable, expressions: t.Dict 5698 ) -> t.Optional[exp.Expression]: 5699 this = parse_method() 5700 5701 while self._match_set(expressions): 5702 this = self.expression( 5703 expressions[self._prev.token_type], 5704 this=this, 5705 comments=self._prev_comments, 5706 expression=parse_method(), 5707 ) 5708 5709 return this 5710 5711 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5712 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5713 5714 def _parse_wrapped_csv( 5715 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5716 ) -> t.List[exp.Expression]: 5717 return self._parse_wrapped( 5718 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5719 ) 5720 5721 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5722 wrapped = self._match(TokenType.L_PAREN) 5723 if not wrapped and not optional: 5724 self.raise_error("Expecting (") 5725 parse_result = parse_method() 5726 if wrapped: 5727 self._match_r_paren() 5728 return parse_result 5729 5730 def _parse_expressions(self) -> t.List[exp.Expression]: 5731 return self._parse_csv(self._parse_expression) 5732 5733 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5734 return self._parse_select() or self._parse_set_operations( 5735 self._parse_expression() if alias else self._parse_conjunction() 5736 ) 5737 5738 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5739 return self._parse_query_modifiers( 5740 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5741 ) 5742 5743 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5744 this = None 5745 if self._match_texts(self.TRANSACTION_KIND): 5746 this = self._prev.text 5747 5748 self._match_texts(("TRANSACTION", "WORK")) 5749 5750 modes = [] 5751 while True: 5752 mode = [] 5753 while self._match(TokenType.VAR): 5754 mode.append(self._prev.text) 5755 5756 if mode: 5757 modes.append(" ".join(mode)) 5758 if not self._match(TokenType.COMMA): 5759 break 5760 5761 return self.expression(exp.Transaction, this=this, modes=modes) 5762 5763 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5764 chain = None 5765 savepoint = None 5766 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5767 5768 self._match_texts(("TRANSACTION", "WORK")) 5769 5770 if self._match_text_seq("TO"): 5771 self._match_text_seq("SAVEPOINT") 5772 savepoint = self._parse_id_var() 5773 5774 if self._match(TokenType.AND): 5775 chain = not self._match_text_seq("NO") 5776 self._match_text_seq("CHAIN") 5777 5778 if is_rollback: 5779 return self.expression(exp.Rollback, savepoint=savepoint) 5780 5781 return self.expression(exp.Commit, chain=chain) 5782 5783 def _parse_refresh(self) -> exp.Refresh: 5784 self._match(TokenType.TABLE) 5785 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5786 5787 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5788 if not self._match_text_seq("ADD"): 5789 return None 5790 5791 self._match(TokenType.COLUMN) 5792 exists_column = self._parse_exists(not_=True) 5793 expression = self._parse_field_def() 5794 5795 if expression: 5796 expression.set("exists", exists_column) 5797 5798 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5799 if self._match_texts(("FIRST", "AFTER")): 5800 position = self._prev.text 5801 column_position = self.expression( 5802 exp.ColumnPosition, this=self._parse_column(), position=position 5803 ) 5804 expression.set("position", column_position) 5805 5806 return expression 5807 5808 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5809 drop = self._match(TokenType.DROP) and self._parse_drop() 5810 if drop and not isinstance(drop, exp.Command): 5811 drop.set("kind", drop.args.get("kind", "COLUMN")) 5812 return drop 5813 5814 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5815 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5816 return self.expression( 5817 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5818 ) 5819 5820 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5821 index = self._index - 1 5822 5823 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5824 return self._parse_csv( 5825 lambda: self.expression( 5826 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5827 ) 5828 ) 5829 5830 self._retreat(index) 5831 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5832 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5833 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5834 5835 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 5836 if self._match_texts(self.ALTER_ALTER_PARSERS): 5837 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 5838 5839 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 5840 # keyword after ALTER we default to parsing this statement 5841 self._match(TokenType.COLUMN) 5842 column = self._parse_field(any_token=True) 5843 5844 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5845 return self.expression(exp.AlterColumn, this=column, drop=True) 5846 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5847 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5848 if self._match(TokenType.COMMENT): 5849 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5850 5851 self._match_text_seq("SET", "DATA") 5852 self._match_text_seq("TYPE") 5853 return self.expression( 5854 exp.AlterColumn, 5855 this=column, 5856 dtype=self._parse_types(), 5857 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5858 using=self._match(TokenType.USING) and self._parse_conjunction(), 5859 ) 5860 5861 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 5862 if self._match_texts(("ALL", "EVEN", "AUTO")): 5863 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 5864 5865 self._match_text_seq("KEY", "DISTKEY") 5866 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 5867 5868 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 5869 if compound: 5870 self._match_text_seq("SORTKEY") 5871 5872 if self._match(TokenType.L_PAREN, advance=False): 5873 return self.expression( 5874 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 5875 ) 5876 5877 self._match_texts(("AUTO", "NONE")) 5878 return self.expression( 5879 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 5880 ) 5881 5882 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5883 index = self._index - 1 5884 5885 partition_exists = self._parse_exists() 5886 if self._match(TokenType.PARTITION, advance=False): 5887 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5888 5889 self._retreat(index) 5890 return self._parse_csv(self._parse_drop_column) 5891 5892 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5893 if self._match(TokenType.COLUMN): 5894 exists = self._parse_exists() 5895 old_column = self._parse_column() 5896 to = self._match_text_seq("TO") 5897 new_column = self._parse_column() 5898 5899 if old_column is None or to is None or new_column is None: 5900 return None 5901 5902 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5903 5904 self._match_text_seq("TO") 5905 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5906 5907 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5908 start = self._prev 5909 5910 if not self._match(TokenType.TABLE): 5911 return self._parse_as_command(start) 5912 5913 exists = self._parse_exists() 5914 only = self._match_text_seq("ONLY") 5915 this = self._parse_table(schema=True) 5916 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 5917 5918 if self._next: 5919 self._advance() 5920 5921 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5922 if parser: 5923 actions = ensure_list(parser(self)) 5924 options = self._parse_csv(self._parse_property) 5925 5926 if not self._curr and actions: 5927 return self.expression( 5928 exp.AlterTable, 5929 this=this, 5930 exists=exists, 5931 actions=actions, 5932 only=only, 5933 options=options, 5934 cluster=cluster, 5935 ) 5936 5937 return self._parse_as_command(start) 5938 5939 def _parse_merge(self) -> exp.Merge: 5940 self._match(TokenType.INTO) 5941 target = self._parse_table() 5942 5943 if target and self._match(TokenType.ALIAS, advance=False): 5944 target.set("alias", self._parse_table_alias()) 5945 5946 self._match(TokenType.USING) 5947 using = self._parse_table() 5948 5949 self._match(TokenType.ON) 5950 on = self._parse_conjunction() 5951 5952 return self.expression( 5953 exp.Merge, 5954 this=target, 5955 using=using, 5956 on=on, 5957 expressions=self._parse_when_matched(), 5958 ) 5959 5960 def _parse_when_matched(self) -> t.List[exp.When]: 5961 whens = [] 5962 5963 while self._match(TokenType.WHEN): 5964 matched = not self._match(TokenType.NOT) 5965 self._match_text_seq("MATCHED") 5966 source = ( 5967 False 5968 if self._match_text_seq("BY", "TARGET") 5969 else self._match_text_seq("BY", "SOURCE") 5970 ) 5971 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5972 5973 self._match(TokenType.THEN) 5974 5975 if self._match(TokenType.INSERT): 5976 _this = self._parse_star() 5977 if _this: 5978 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5979 else: 5980 then = self.expression( 5981 exp.Insert, 5982 this=self._parse_value(), 5983 expression=self._match_text_seq("VALUES") and self._parse_value(), 5984 ) 5985 elif self._match(TokenType.UPDATE): 5986 expressions = self._parse_star() 5987 if expressions: 5988 then = self.expression(exp.Update, expressions=expressions) 5989 else: 5990 then = self.expression( 5991 exp.Update, 5992 expressions=self._match(TokenType.SET) 5993 and self._parse_csv(self._parse_equality), 5994 ) 5995 elif self._match(TokenType.DELETE): 5996 then = self.expression(exp.Var, this=self._prev.text) 5997 else: 5998 then = None 5999 6000 whens.append( 6001 self.expression( 6002 exp.When, 6003 matched=matched, 6004 source=source, 6005 condition=condition, 6006 then=then, 6007 ) 6008 ) 6009 return whens 6010 6011 def _parse_show(self) -> t.Optional[exp.Expression]: 6012 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6013 if parser: 6014 return parser(self) 6015 return self._parse_as_command(self._prev) 6016 6017 def _parse_set_item_assignment( 6018 self, kind: t.Optional[str] = None 6019 ) -> t.Optional[exp.Expression]: 6020 index = self._index 6021 6022 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6023 return self._parse_set_transaction(global_=kind == "GLOBAL") 6024 6025 left = self._parse_primary() or self._parse_column() 6026 assignment_delimiter = self._match_texts(("=", "TO")) 6027 6028 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6029 self._retreat(index) 6030 return None 6031 6032 right = self._parse_statement() or self._parse_id_var() 6033 this = self.expression(exp.EQ, this=left, expression=right) 6034 6035 return self.expression(exp.SetItem, this=this, kind=kind) 6036 6037 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6038 self._match_text_seq("TRANSACTION") 6039 characteristics = self._parse_csv( 6040 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6041 ) 6042 return self.expression( 6043 exp.SetItem, 6044 expressions=characteristics, 6045 kind="TRANSACTION", 6046 **{"global": global_}, # type: ignore 6047 ) 6048 6049 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6050 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6051 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6052 6053 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6054 index = self._index 6055 set_ = self.expression( 6056 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6057 ) 6058 6059 if self._curr: 6060 self._retreat(index) 6061 return self._parse_as_command(self._prev) 6062 6063 return set_ 6064 6065 def _parse_var_from_options( 6066 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6067 ) -> t.Optional[exp.Var]: 6068 start = self._curr 6069 if not start: 6070 return None 6071 6072 option = start.text.upper() 6073 continuations = options.get(option) 6074 6075 index = self._index 6076 self._advance() 6077 for keywords in continuations or []: 6078 if isinstance(keywords, str): 6079 keywords = (keywords,) 6080 6081 if self._match_text_seq(*keywords): 6082 option = f"{option} {' '.join(keywords)}" 6083 break 6084 else: 6085 if continuations or continuations is None: 6086 if raise_unmatched: 6087 self.raise_error(f"Unknown option {option}") 6088 6089 self._retreat(index) 6090 return None 6091 6092 return exp.var(option) 6093 6094 def _parse_as_command(self, start: Token) -> exp.Command: 6095 while self._curr: 6096 self._advance() 6097 text = self._find_sql(start, self._prev) 6098 size = len(start.text) 6099 self._warn_unsupported() 6100 return exp.Command(this=text[:size], expression=text[size:]) 6101 6102 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6103 settings = [] 6104 6105 self._match_l_paren() 6106 kind = self._parse_id_var() 6107 6108 if self._match(TokenType.L_PAREN): 6109 while True: 6110 key = self._parse_id_var() 6111 value = self._parse_primary() 6112 6113 if not key and value is None: 6114 break 6115 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6116 self._match(TokenType.R_PAREN) 6117 6118 self._match_r_paren() 6119 6120 return self.expression( 6121 exp.DictProperty, 6122 this=this, 6123 kind=kind.this if kind else None, 6124 settings=settings, 6125 ) 6126 6127 def _parse_dict_range(self, this: str) -> exp.DictRange: 6128 self._match_l_paren() 6129 has_min = self._match_text_seq("MIN") 6130 if has_min: 6131 min = self._parse_var() or self._parse_primary() 6132 self._match_text_seq("MAX") 6133 max = self._parse_var() or self._parse_primary() 6134 else: 6135 max = self._parse_var() or self._parse_primary() 6136 min = exp.Literal.number(0) 6137 self._match_r_paren() 6138 return self.expression(exp.DictRange, this=this, min=min, max=max) 6139 6140 def _parse_comprehension( 6141 self, this: t.Optional[exp.Expression] 6142 ) -> t.Optional[exp.Comprehension]: 6143 index = self._index 6144 expression = self._parse_column() 6145 if not self._match(TokenType.IN): 6146 self._retreat(index - 1) 6147 return None 6148 iterator = self._parse_column() 6149 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6150 return self.expression( 6151 exp.Comprehension, 6152 this=this, 6153 expression=expression, 6154 iterator=iterator, 6155 condition=condition, 6156 ) 6157 6158 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6159 if self._match(TokenType.HEREDOC_STRING): 6160 return self.expression(exp.Heredoc, this=self._prev.text) 6161 6162 if not self._match_text_seq("$"): 6163 return None 6164 6165 tags = ["$"] 6166 tag_text = None 6167 6168 if self._is_connected(): 6169 self._advance() 6170 tags.append(self._prev.text.upper()) 6171 else: 6172 self.raise_error("No closing $ found") 6173 6174 if tags[-1] != "$": 6175 if self._is_connected() and self._match_text_seq("$"): 6176 tag_text = tags[-1] 6177 tags.append("$") 6178 else: 6179 self.raise_error("No closing $ found") 6180 6181 heredoc_start = self._curr 6182 6183 while self._curr: 6184 if self._match_text_seq(*tags, advance=False): 6185 this = self._find_sql(heredoc_start, self._prev) 6186 self._advance(len(tags)) 6187 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6188 6189 self._advance() 6190 6191 self.raise_error(f"No closing {''.join(tags)} found") 6192 return None 6193 6194 def _find_parser( 6195 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6196 ) -> t.Optional[t.Callable]: 6197 if not self._curr: 6198 return None 6199 6200 index = self._index 6201 this = [] 6202 while True: 6203 # The current token might be multiple words 6204 curr = self._curr.text.upper() 6205 key = curr.split(" ") 6206 this.append(curr) 6207 6208 self._advance() 6209 result, trie = in_trie(trie, key) 6210 if result == TrieResult.FAILED: 6211 break 6212 6213 if result == TrieResult.EXISTS: 6214 subparser = parsers[" ".join(this)] 6215 return subparser 6216 6217 self._retreat(index) 6218 return None 6219 6220 def _match(self, token_type, advance=True, expression=None): 6221 if not self._curr: 6222 return None 6223 6224 if self._curr.token_type == token_type: 6225 if advance: 6226 self._advance() 6227 self._add_comments(expression) 6228 return True 6229 6230 return None 6231 6232 def _match_set(self, types, advance=True): 6233 if not self._curr: 6234 return None 6235 6236 if self._curr.token_type in types: 6237 if advance: 6238 self._advance() 6239 return True 6240 6241 return None 6242 6243 def _match_pair(self, token_type_a, token_type_b, advance=True): 6244 if not self._curr or not self._next: 6245 return None 6246 6247 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6248 if advance: 6249 self._advance(2) 6250 return True 6251 6252 return None 6253 6254 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6255 if not self._match(TokenType.L_PAREN, expression=expression): 6256 self.raise_error("Expecting (") 6257 6258 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6259 if not self._match(TokenType.R_PAREN, expression=expression): 6260 self.raise_error("Expecting )") 6261 6262 def _match_texts(self, texts, advance=True): 6263 if self._curr and self._curr.text.upper() in texts: 6264 if advance: 6265 self._advance() 6266 return True 6267 return None 6268 6269 def _match_text_seq(self, *texts, advance=True): 6270 index = self._index 6271 for text in texts: 6272 if self._curr and self._curr.text.upper() == text: 6273 self._advance() 6274 else: 6275 self._retreat(index) 6276 return None 6277 6278 if not advance: 6279 self._retreat(index) 6280 6281 return True 6282 6283 def _replace_lambda( 6284 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6285 ) -> t.Optional[exp.Expression]: 6286 if not node: 6287 return node 6288 6289 for column in node.find_all(exp.Column): 6290 if column.parts[0].name in lambda_variables: 6291 dot_or_id = column.to_dot() if column.table else column.this 6292 parent = column.parent 6293 6294 while isinstance(parent, exp.Dot): 6295 if not isinstance(parent.parent, exp.Dot): 6296 parent.replace(dot_or_id) 6297 break 6298 parent = parent.parent 6299 else: 6300 if column is node: 6301 node = dot_or_id 6302 else: 6303 column.replace(dot_or_id) 6304 return node 6305 6306 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6307 start = self._prev 6308 6309 # Not to be confused with TRUNCATE(number, decimals) function call 6310 if self._match(TokenType.L_PAREN): 6311 self._retreat(self._index - 2) 6312 return self._parse_function() 6313 6314 # Clickhouse supports TRUNCATE DATABASE as well 6315 is_database = self._match(TokenType.DATABASE) 6316 6317 self._match(TokenType.TABLE) 6318 6319 exists = self._parse_exists(not_=False) 6320 6321 expressions = self._parse_csv( 6322 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6323 ) 6324 6325 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6326 6327 if self._match_text_seq("RESTART", "IDENTITY"): 6328 identity = "RESTART" 6329 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6330 identity = "CONTINUE" 6331 else: 6332 identity = None 6333 6334 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6335 option = self._prev.text 6336 else: 6337 option = None 6338 6339 partition = self._parse_partition() 6340 6341 # Fallback case 6342 if self._curr: 6343 return self._parse_as_command(start) 6344 6345 return self.expression( 6346 exp.TruncateTable, 6347 expressions=expressions, 6348 is_database=is_database, 6349 exists=exists, 6350 cluster=cluster, 6351 identity=identity, 6352 option=option, 6353 partition=partition, 6354 ) 6355 6356 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6357 this = self._parse_ordered(self._parse_opclass) 6358 6359 if not self._match(TokenType.WITH): 6360 return this 6361 6362 op = self._parse_var(any_token=True) 6363 6364 return self.expression(exp.WithOperator, this=this, op=op) 6365 6366 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6367 opts = [] 6368 self._match(TokenType.EQ) 6369 self._match(TokenType.L_PAREN) 6370 while self._curr and not self._match(TokenType.R_PAREN): 6371 opts.append(self._parse_conjunction()) 6372 self._match(TokenType.COMMA) 6373 return opts 6374 6375 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6376 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6377 6378 options = [] 6379 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6380 option = self._parse_unquoted_field() 6381 value = None 6382 6383 # Some options are defined as functions with the values as params 6384 if not isinstance(option, exp.Func): 6385 prev = self._prev.text.upper() 6386 # Different dialects might separate options and values by white space, "=" and "AS" 6387 self._match(TokenType.EQ) 6388 self._match(TokenType.ALIAS) 6389 6390 if prev == "FILE_FORMAT" and self._match(TokenType.L_PAREN): 6391 # Snowflake FILE_FORMAT case 6392 value = self._parse_wrapped_options() 6393 else: 6394 value = self._parse_unquoted_field() 6395 6396 param = self.expression(exp.CopyParameter, this=option, expression=value) 6397 options.append(param) 6398 6399 if sep: 6400 self._match(sep) 6401 6402 return options 6403 6404 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6405 expr = self.expression(exp.Credentials) 6406 6407 if self._match_text_seq("STORAGE_INTEGRATION", advance=False): 6408 expr.set("storage", self._parse_conjunction()) 6409 if self._match_text_seq("CREDENTIALS"): 6410 # Snowflake supports CREDENTIALS = (...), while Redshift CREDENTIALS <string> 6411 creds = ( 6412 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6413 ) 6414 expr.set("credentials", creds) 6415 if self._match_text_seq("ENCRYPTION"): 6416 expr.set("encryption", self._parse_wrapped_options()) 6417 if self._match_text_seq("IAM_ROLE"): 6418 expr.set("iam_role", self._parse_field()) 6419 if self._match_text_seq("REGION"): 6420 expr.set("region", self._parse_field()) 6421 6422 return expr 6423 6424 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6425 return self._parse_field() 6426 6427 def _parse_copy(self) -> exp.Copy | exp.Command: 6428 start = self._prev 6429 6430 self._match(TokenType.INTO) 6431 6432 this = ( 6433 self._parse_conjunction() 6434 if self._match(TokenType.L_PAREN, advance=False) 6435 else self._parse_table(schema=True) 6436 ) 6437 6438 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6439 6440 files = self._parse_csv(self._parse_file_location) 6441 credentials = self._parse_credentials() 6442 6443 self._match_text_seq("WITH") 6444 6445 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6446 6447 # Fallback case 6448 if self._curr: 6449 return self._parse_as_command(start) 6450 6451 return self.expression( 6452 exp.Copy, 6453 this=this, 6454 kind=kind, 6455 credentials=credentials, 6456 files=files, 6457 params=params, 6458 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1158 def __init__( 1159 self, 1160 error_level: t.Optional[ErrorLevel] = None, 1161 error_message_context: int = 100, 1162 max_errors: int = 3, 1163 dialect: DialectType = None, 1164 ): 1165 from sqlglot.dialects import Dialect 1166 1167 self.error_level = error_level or ErrorLevel.IMMEDIATE 1168 self.error_message_context = error_message_context 1169 self.max_errors = max_errors 1170 self.dialect = Dialect.get_or_raise(dialect) 1171 self.reset()
1183 def parse( 1184 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1185 ) -> t.List[t.Optional[exp.Expression]]: 1186 """ 1187 Parses a list of tokens and returns a list of syntax trees, one tree 1188 per parsed SQL statement. 1189 1190 Args: 1191 raw_tokens: The list of tokens. 1192 sql: The original SQL string, used to produce helpful debug messages. 1193 1194 Returns: 1195 The list of the produced syntax trees. 1196 """ 1197 return self._parse( 1198 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1199 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1201 def parse_into( 1202 self, 1203 expression_types: exp.IntoType, 1204 raw_tokens: t.List[Token], 1205 sql: t.Optional[str] = None, 1206 ) -> t.List[t.Optional[exp.Expression]]: 1207 """ 1208 Parses a list of tokens into a given Expression type. If a collection of Expression 1209 types is given instead, this method will try to parse the token list into each one 1210 of them, stopping at the first for which the parsing succeeds. 1211 1212 Args: 1213 expression_types: The expression type(s) to try and parse the token list into. 1214 raw_tokens: The list of tokens. 1215 sql: The original SQL string, used to produce helpful debug messages. 1216 1217 Returns: 1218 The target Expression. 1219 """ 1220 errors = [] 1221 for expression_type in ensure_list(expression_types): 1222 parser = self.EXPRESSION_PARSERS.get(expression_type) 1223 if not parser: 1224 raise TypeError(f"No parser registered for {expression_type}") 1225 1226 try: 1227 return self._parse(parser, raw_tokens, sql) 1228 except ParseError as e: 1229 e.errors[0]["into_expression"] = expression_type 1230 errors.append(e) 1231 1232 raise ParseError( 1233 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1234 errors=merge_errors(errors), 1235 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1275 def check_errors(self) -> None: 1276 """Logs or raises any found errors, depending on the chosen error level setting.""" 1277 if self.error_level == ErrorLevel.WARN: 1278 for error in self.errors: 1279 logger.error(str(error)) 1280 elif self.error_level == ErrorLevel.RAISE and self.errors: 1281 raise ParseError( 1282 concat_messages(self.errors, self.max_errors), 1283 errors=merge_errors(self.errors), 1284 )
Logs or raises any found errors, depending on the chosen error level setting.
1286 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1287 """ 1288 Appends an error in the list of recorded errors or raises it, depending on the chosen 1289 error level setting. 1290 """ 1291 token = token or self._curr or self._prev or Token.string("") 1292 start = token.start 1293 end = token.end + 1 1294 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1295 highlight = self.sql[start:end] 1296 end_context = self.sql[end : end + self.error_message_context] 1297 1298 error = ParseError.new( 1299 f"{message}. Line {token.line}, Col: {token.col}.\n" 1300 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1301 description=message, 1302 line=token.line, 1303 col=token.col, 1304 start_context=start_context, 1305 highlight=highlight, 1306 end_context=end_context, 1307 ) 1308 1309 if self.error_level == ErrorLevel.IMMEDIATE: 1310 raise error 1311 1312 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1314 def expression( 1315 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1316 ) -> E: 1317 """ 1318 Creates a new, validated Expression. 1319 1320 Args: 1321 exp_class: The expression class to instantiate. 1322 comments: An optional list of comments to attach to the expression. 1323 kwargs: The arguments to set for the expression along with their respective values. 1324 1325 Returns: 1326 The target expression. 1327 """ 1328 instance = exp_class(**kwargs) 1329 instance.add_comments(comments) if comments else self._add_comments(instance) 1330 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1337 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1338 """ 1339 Validates an Expression, making sure that all its mandatory arguments are set. 1340 1341 Args: 1342 expression: The expression to validate. 1343 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1344 1345 Returns: 1346 The validated expression. 1347 """ 1348 if self.error_level != ErrorLevel.IGNORE: 1349 for error_message in expression.error_messages(args): 1350 self.raise_error(error_message) 1351 1352 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.