sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 return lambda self, this: self._parse_escape( 47 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 48 ) 49 50 51def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 62 63 64def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 65 def _builder(args: t.List, dialect: Dialect) -> E: 66 expression = expr_type( 67 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 68 ) 69 if len(args) > 2 and expr_type is exp.JSONExtract: 70 expression.set("expressions", args[2:]) 71 72 return expression 73 74 return _builder 75 76 77class _Parser(type): 78 def __new__(cls, clsname, bases, attrs): 79 klass = super().__new__(cls, clsname, bases, attrs) 80 81 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 82 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 83 84 return klass 85 86 87class Parser(metaclass=_Parser): 88 """ 89 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 90 91 Args: 92 error_level: The desired error level. 93 Default: ErrorLevel.IMMEDIATE 94 error_message_context: The amount of context to capture from a query string when displaying 95 the error message (in number of characters). 96 Default: 100 97 max_errors: Maximum number of error messages to include in a raised ParseError. 98 This is only relevant if error_level is ErrorLevel.RAISE. 99 Default: 3 100 """ 101 102 FUNCTIONS: t.Dict[str, t.Callable] = { 103 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 104 "CONCAT": lambda args, dialect: exp.Concat( 105 expressions=args, 106 safe=not dialect.STRICT_STRING_CONCAT, 107 coalesce=dialect.CONCAT_COALESCE, 108 ), 109 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 110 expressions=args, 111 safe=not dialect.STRICT_STRING_CONCAT, 112 coalesce=dialect.CONCAT_COALESCE, 113 ), 114 "DATE_TO_DATE_STR": lambda args: exp.Cast( 115 this=seq_get(args, 0), 116 to=exp.DataType(this=exp.DataType.Type.TEXT), 117 ), 118 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 119 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 120 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 121 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 122 "LIKE": build_like, 123 "LOG": build_logarithm, 124 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 125 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 126 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 127 "TIME_TO_TIME_STR": lambda args: exp.Cast( 128 this=seq_get(args, 0), 129 to=exp.DataType(this=exp.DataType.Type.TEXT), 130 ), 131 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 132 this=exp.Cast( 133 this=seq_get(args, 0), 134 to=exp.DataType(this=exp.DataType.Type.TEXT), 135 ), 136 start=exp.Literal.number(1), 137 length=exp.Literal.number(10), 138 ), 139 "VAR_MAP": build_var_map, 140 } 141 142 NO_PAREN_FUNCTIONS = { 143 TokenType.CURRENT_DATE: exp.CurrentDate, 144 TokenType.CURRENT_DATETIME: exp.CurrentDate, 145 TokenType.CURRENT_TIME: exp.CurrentTime, 146 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 147 TokenType.CURRENT_USER: exp.CurrentUser, 148 } 149 150 STRUCT_TYPE_TOKENS = { 151 TokenType.NESTED, 152 TokenType.OBJECT, 153 TokenType.STRUCT, 154 } 155 156 NESTED_TYPE_TOKENS = { 157 TokenType.ARRAY, 158 TokenType.LOWCARDINALITY, 159 TokenType.MAP, 160 TokenType.NULLABLE, 161 *STRUCT_TYPE_TOKENS, 162 } 163 164 ENUM_TYPE_TOKENS = { 165 TokenType.ENUM, 166 TokenType.ENUM8, 167 TokenType.ENUM16, 168 } 169 170 AGGREGATE_TYPE_TOKENS = { 171 TokenType.AGGREGATEFUNCTION, 172 TokenType.SIMPLEAGGREGATEFUNCTION, 173 } 174 175 TYPE_TOKENS = { 176 TokenType.BIT, 177 TokenType.BOOLEAN, 178 TokenType.TINYINT, 179 TokenType.UTINYINT, 180 TokenType.SMALLINT, 181 TokenType.USMALLINT, 182 TokenType.INT, 183 TokenType.UINT, 184 TokenType.BIGINT, 185 TokenType.UBIGINT, 186 TokenType.INT128, 187 TokenType.UINT128, 188 TokenType.INT256, 189 TokenType.UINT256, 190 TokenType.MEDIUMINT, 191 TokenType.UMEDIUMINT, 192 TokenType.FIXEDSTRING, 193 TokenType.FLOAT, 194 TokenType.DOUBLE, 195 TokenType.CHAR, 196 TokenType.NCHAR, 197 TokenType.VARCHAR, 198 TokenType.NVARCHAR, 199 TokenType.BPCHAR, 200 TokenType.TEXT, 201 TokenType.MEDIUMTEXT, 202 TokenType.LONGTEXT, 203 TokenType.MEDIUMBLOB, 204 TokenType.LONGBLOB, 205 TokenType.BINARY, 206 TokenType.VARBINARY, 207 TokenType.JSON, 208 TokenType.JSONB, 209 TokenType.INTERVAL, 210 TokenType.TINYBLOB, 211 TokenType.TINYTEXT, 212 TokenType.TIME, 213 TokenType.TIMETZ, 214 TokenType.TIMESTAMP, 215 TokenType.TIMESTAMP_S, 216 TokenType.TIMESTAMP_MS, 217 TokenType.TIMESTAMP_NS, 218 TokenType.TIMESTAMPTZ, 219 TokenType.TIMESTAMPLTZ, 220 TokenType.DATETIME, 221 TokenType.DATETIME64, 222 TokenType.DATE, 223 TokenType.DATE32, 224 TokenType.INT4RANGE, 225 TokenType.INT4MULTIRANGE, 226 TokenType.INT8RANGE, 227 TokenType.INT8MULTIRANGE, 228 TokenType.NUMRANGE, 229 TokenType.NUMMULTIRANGE, 230 TokenType.TSRANGE, 231 TokenType.TSMULTIRANGE, 232 TokenType.TSTZRANGE, 233 TokenType.TSTZMULTIRANGE, 234 TokenType.DATERANGE, 235 TokenType.DATEMULTIRANGE, 236 TokenType.DECIMAL, 237 TokenType.UDECIMAL, 238 TokenType.BIGDECIMAL, 239 TokenType.UUID, 240 TokenType.GEOGRAPHY, 241 TokenType.GEOMETRY, 242 TokenType.HLLSKETCH, 243 TokenType.HSTORE, 244 TokenType.PSEUDO_TYPE, 245 TokenType.SUPER, 246 TokenType.SERIAL, 247 TokenType.SMALLSERIAL, 248 TokenType.BIGSERIAL, 249 TokenType.XML, 250 TokenType.YEAR, 251 TokenType.UNIQUEIDENTIFIER, 252 TokenType.USERDEFINED, 253 TokenType.MONEY, 254 TokenType.SMALLMONEY, 255 TokenType.ROWVERSION, 256 TokenType.IMAGE, 257 TokenType.VARIANT, 258 TokenType.OBJECT, 259 TokenType.OBJECT_IDENTIFIER, 260 TokenType.INET, 261 TokenType.IPADDRESS, 262 TokenType.IPPREFIX, 263 TokenType.IPV4, 264 TokenType.IPV6, 265 TokenType.UNKNOWN, 266 TokenType.NULL, 267 TokenType.NAME, 268 *ENUM_TYPE_TOKENS, 269 *NESTED_TYPE_TOKENS, 270 *AGGREGATE_TYPE_TOKENS, 271 } 272 273 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 274 TokenType.BIGINT: TokenType.UBIGINT, 275 TokenType.INT: TokenType.UINT, 276 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 277 TokenType.SMALLINT: TokenType.USMALLINT, 278 TokenType.TINYINT: TokenType.UTINYINT, 279 TokenType.DECIMAL: TokenType.UDECIMAL, 280 } 281 282 SUBQUERY_PREDICATES = { 283 TokenType.ANY: exp.Any, 284 TokenType.ALL: exp.All, 285 TokenType.EXISTS: exp.Exists, 286 TokenType.SOME: exp.Any, 287 } 288 289 RESERVED_TOKENS = { 290 *Tokenizer.SINGLE_TOKENS.values(), 291 TokenType.SELECT, 292 } 293 294 DB_CREATABLES = { 295 TokenType.DATABASE, 296 TokenType.SCHEMA, 297 TokenType.TABLE, 298 TokenType.VIEW, 299 TokenType.MODEL, 300 TokenType.DICTIONARY, 301 TokenType.SEQUENCE, 302 TokenType.STORAGE_INTEGRATION, 303 } 304 305 CREATABLES = { 306 TokenType.COLUMN, 307 TokenType.CONSTRAINT, 308 TokenType.FUNCTION, 309 TokenType.INDEX, 310 TokenType.PROCEDURE, 311 TokenType.FOREIGN_KEY, 312 *DB_CREATABLES, 313 } 314 315 # Tokens that can represent identifiers 316 ID_VAR_TOKENS = { 317 TokenType.VAR, 318 TokenType.ANTI, 319 TokenType.APPLY, 320 TokenType.ASC, 321 TokenType.ASOF, 322 TokenType.AUTO_INCREMENT, 323 TokenType.BEGIN, 324 TokenType.BPCHAR, 325 TokenType.CACHE, 326 TokenType.CASE, 327 TokenType.COLLATE, 328 TokenType.COMMAND, 329 TokenType.COMMENT, 330 TokenType.COMMIT, 331 TokenType.CONSTRAINT, 332 TokenType.DEFAULT, 333 TokenType.DELETE, 334 TokenType.DESC, 335 TokenType.DESCRIBE, 336 TokenType.DICTIONARY, 337 TokenType.DIV, 338 TokenType.END, 339 TokenType.EXECUTE, 340 TokenType.ESCAPE, 341 TokenType.FALSE, 342 TokenType.FIRST, 343 TokenType.FILTER, 344 TokenType.FINAL, 345 TokenType.FORMAT, 346 TokenType.FULL, 347 TokenType.IS, 348 TokenType.ISNULL, 349 TokenType.INTERVAL, 350 TokenType.KEEP, 351 TokenType.KILL, 352 TokenType.LEFT, 353 TokenType.LOAD, 354 TokenType.MERGE, 355 TokenType.NATURAL, 356 TokenType.NEXT, 357 TokenType.OFFSET, 358 TokenType.OPERATOR, 359 TokenType.ORDINALITY, 360 TokenType.OVERLAPS, 361 TokenType.OVERWRITE, 362 TokenType.PARTITION, 363 TokenType.PERCENT, 364 TokenType.PIVOT, 365 TokenType.PRAGMA, 366 TokenType.RANGE, 367 TokenType.RECURSIVE, 368 TokenType.REFERENCES, 369 TokenType.REFRESH, 370 TokenType.REPLACE, 371 TokenType.RIGHT, 372 TokenType.ROW, 373 TokenType.ROWS, 374 TokenType.SEMI, 375 TokenType.SET, 376 TokenType.SETTINGS, 377 TokenType.SHOW, 378 TokenType.TEMPORARY, 379 TokenType.TOP, 380 TokenType.TRUE, 381 TokenType.TRUNCATE, 382 TokenType.UNIQUE, 383 TokenType.UNPIVOT, 384 TokenType.UPDATE, 385 TokenType.USE, 386 TokenType.VOLATILE, 387 TokenType.WINDOW, 388 *CREATABLES, 389 *SUBQUERY_PREDICATES, 390 *TYPE_TOKENS, 391 *NO_PAREN_FUNCTIONS, 392 } 393 394 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 395 396 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 397 TokenType.ANTI, 398 TokenType.APPLY, 399 TokenType.ASOF, 400 TokenType.FULL, 401 TokenType.LEFT, 402 TokenType.LOCK, 403 TokenType.NATURAL, 404 TokenType.OFFSET, 405 TokenType.RIGHT, 406 TokenType.SEMI, 407 TokenType.WINDOW, 408 } 409 410 ALIAS_TOKENS = ID_VAR_TOKENS 411 412 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 413 414 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 415 416 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 417 418 FUNC_TOKENS = { 419 TokenType.COLLATE, 420 TokenType.COMMAND, 421 TokenType.CURRENT_DATE, 422 TokenType.CURRENT_DATETIME, 423 TokenType.CURRENT_TIMESTAMP, 424 TokenType.CURRENT_TIME, 425 TokenType.CURRENT_USER, 426 TokenType.FILTER, 427 TokenType.FIRST, 428 TokenType.FORMAT, 429 TokenType.GLOB, 430 TokenType.IDENTIFIER, 431 TokenType.INDEX, 432 TokenType.ISNULL, 433 TokenType.ILIKE, 434 TokenType.INSERT, 435 TokenType.LIKE, 436 TokenType.MERGE, 437 TokenType.OFFSET, 438 TokenType.PRIMARY_KEY, 439 TokenType.RANGE, 440 TokenType.REPLACE, 441 TokenType.RLIKE, 442 TokenType.ROW, 443 TokenType.UNNEST, 444 TokenType.VAR, 445 TokenType.LEFT, 446 TokenType.RIGHT, 447 TokenType.SEQUENCE, 448 TokenType.DATE, 449 TokenType.DATETIME, 450 TokenType.TABLE, 451 TokenType.TIMESTAMP, 452 TokenType.TIMESTAMPTZ, 453 TokenType.TRUNCATE, 454 TokenType.WINDOW, 455 TokenType.XOR, 456 *TYPE_TOKENS, 457 *SUBQUERY_PREDICATES, 458 } 459 460 CONJUNCTION = { 461 TokenType.AND: exp.And, 462 TokenType.OR: exp.Or, 463 } 464 465 EQUALITY = { 466 TokenType.COLON_EQ: exp.PropertyEQ, 467 TokenType.EQ: exp.EQ, 468 TokenType.NEQ: exp.NEQ, 469 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 470 } 471 472 COMPARISON = { 473 TokenType.GT: exp.GT, 474 TokenType.GTE: exp.GTE, 475 TokenType.LT: exp.LT, 476 TokenType.LTE: exp.LTE, 477 } 478 479 BITWISE = { 480 TokenType.AMP: exp.BitwiseAnd, 481 TokenType.CARET: exp.BitwiseXor, 482 TokenType.PIPE: exp.BitwiseOr, 483 } 484 485 TERM = { 486 TokenType.DASH: exp.Sub, 487 TokenType.PLUS: exp.Add, 488 TokenType.MOD: exp.Mod, 489 TokenType.COLLATE: exp.Collate, 490 } 491 492 FACTOR = { 493 TokenType.DIV: exp.IntDiv, 494 TokenType.LR_ARROW: exp.Distance, 495 TokenType.SLASH: exp.Div, 496 TokenType.STAR: exp.Mul, 497 } 498 499 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 500 501 TIMES = { 502 TokenType.TIME, 503 TokenType.TIMETZ, 504 } 505 506 TIMESTAMPS = { 507 TokenType.TIMESTAMP, 508 TokenType.TIMESTAMPTZ, 509 TokenType.TIMESTAMPLTZ, 510 *TIMES, 511 } 512 513 SET_OPERATIONS = { 514 TokenType.UNION, 515 TokenType.INTERSECT, 516 TokenType.EXCEPT, 517 } 518 519 JOIN_METHODS = { 520 TokenType.ASOF, 521 TokenType.NATURAL, 522 TokenType.POSITIONAL, 523 } 524 525 JOIN_SIDES = { 526 TokenType.LEFT, 527 TokenType.RIGHT, 528 TokenType.FULL, 529 } 530 531 JOIN_KINDS = { 532 TokenType.INNER, 533 TokenType.OUTER, 534 TokenType.CROSS, 535 TokenType.SEMI, 536 TokenType.ANTI, 537 } 538 539 JOIN_HINTS: t.Set[str] = set() 540 541 LAMBDAS = { 542 TokenType.ARROW: lambda self, expressions: self.expression( 543 exp.Lambda, 544 this=self._replace_lambda( 545 self._parse_conjunction(), 546 {node.name for node in expressions}, 547 ), 548 expressions=expressions, 549 ), 550 TokenType.FARROW: lambda self, expressions: self.expression( 551 exp.Kwarg, 552 this=exp.var(expressions[0].name), 553 expression=self._parse_conjunction(), 554 ), 555 } 556 557 COLUMN_OPERATORS = { 558 TokenType.DOT: None, 559 TokenType.DCOLON: lambda self, this, to: self.expression( 560 exp.Cast if self.STRICT_CAST else exp.TryCast, 561 this=this, 562 to=to, 563 ), 564 TokenType.ARROW: lambda self, this, path: self.expression( 565 exp.JSONExtract, 566 this=this, 567 expression=self.dialect.to_json_path(path), 568 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 569 ), 570 TokenType.DARROW: lambda self, this, path: self.expression( 571 exp.JSONExtractScalar, 572 this=this, 573 expression=self.dialect.to_json_path(path), 574 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 575 ), 576 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 577 exp.JSONBExtract, 578 this=this, 579 expression=path, 580 ), 581 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 582 exp.JSONBExtractScalar, 583 this=this, 584 expression=path, 585 ), 586 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 587 exp.JSONBContains, 588 this=this, 589 expression=key, 590 ), 591 } 592 593 EXPRESSION_PARSERS = { 594 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 595 exp.Column: lambda self: self._parse_column(), 596 exp.Condition: lambda self: self._parse_conjunction(), 597 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 598 exp.Expression: lambda self: self._parse_expression(), 599 exp.From: lambda self: self._parse_from(), 600 exp.Group: lambda self: self._parse_group(), 601 exp.Having: lambda self: self._parse_having(), 602 exp.Identifier: lambda self: self._parse_id_var(), 603 exp.Join: lambda self: self._parse_join(), 604 exp.Lambda: lambda self: self._parse_lambda(), 605 exp.Lateral: lambda self: self._parse_lateral(), 606 exp.Limit: lambda self: self._parse_limit(), 607 exp.Offset: lambda self: self._parse_offset(), 608 exp.Order: lambda self: self._parse_order(), 609 exp.Ordered: lambda self: self._parse_ordered(), 610 exp.Properties: lambda self: self._parse_properties(), 611 exp.Qualify: lambda self: self._parse_qualify(), 612 exp.Returning: lambda self: self._parse_returning(), 613 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 614 exp.Table: lambda self: self._parse_table_parts(), 615 exp.TableAlias: lambda self: self._parse_table_alias(), 616 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 617 exp.Where: lambda self: self._parse_where(), 618 exp.Window: lambda self: self._parse_named_window(), 619 exp.With: lambda self: self._parse_with(), 620 "JOIN_TYPE": lambda self: self._parse_join_parts(), 621 } 622 623 STATEMENT_PARSERS = { 624 TokenType.ALTER: lambda self: self._parse_alter(), 625 TokenType.BEGIN: lambda self: self._parse_transaction(), 626 TokenType.CACHE: lambda self: self._parse_cache(), 627 TokenType.COMMENT: lambda self: self._parse_comment(), 628 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 629 TokenType.CREATE: lambda self: self._parse_create(), 630 TokenType.DELETE: lambda self: self._parse_delete(), 631 TokenType.DESC: lambda self: self._parse_describe(), 632 TokenType.DESCRIBE: lambda self: self._parse_describe(), 633 TokenType.DROP: lambda self: self._parse_drop(), 634 TokenType.INSERT: lambda self: self._parse_insert(), 635 TokenType.KILL: lambda self: self._parse_kill(), 636 TokenType.LOAD: lambda self: self._parse_load(), 637 TokenType.MERGE: lambda self: self._parse_merge(), 638 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 639 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 640 TokenType.REFRESH: lambda self: self._parse_refresh(), 641 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 642 TokenType.SET: lambda self: self._parse_set(), 643 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 644 TokenType.UNCACHE: lambda self: self._parse_uncache(), 645 TokenType.UPDATE: lambda self: self._parse_update(), 646 TokenType.USE: lambda self: self.expression( 647 exp.Use, 648 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 649 this=self._parse_table(schema=False), 650 ), 651 } 652 653 UNARY_PARSERS = { 654 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 655 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 656 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 657 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 658 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 659 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 660 } 661 662 STRING_PARSERS = { 663 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 664 exp.RawString, this=token.text 665 ), 666 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 667 exp.National, this=token.text 668 ), 669 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 670 TokenType.STRING: lambda self, token: self.expression( 671 exp.Literal, this=token.text, is_string=True 672 ), 673 TokenType.UNICODE_STRING: lambda self, token: self.expression( 674 exp.UnicodeString, 675 this=token.text, 676 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 677 ), 678 } 679 680 NUMERIC_PARSERS = { 681 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 682 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 683 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 684 TokenType.NUMBER: lambda self, token: self.expression( 685 exp.Literal, this=token.text, is_string=False 686 ), 687 } 688 689 PRIMARY_PARSERS = { 690 **STRING_PARSERS, 691 **NUMERIC_PARSERS, 692 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 693 TokenType.NULL: lambda self, _: self.expression(exp.Null), 694 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 695 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 696 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 697 TokenType.STAR: lambda self, _: self.expression( 698 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 699 ), 700 } 701 702 PLACEHOLDER_PARSERS = { 703 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 704 TokenType.PARAMETER: lambda self: self._parse_parameter(), 705 TokenType.COLON: lambda self: ( 706 self.expression(exp.Placeholder, this=self._prev.text) 707 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 708 else None 709 ), 710 } 711 712 RANGE_PARSERS = { 713 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 714 TokenType.GLOB: binary_range_parser(exp.Glob), 715 TokenType.ILIKE: binary_range_parser(exp.ILike), 716 TokenType.IN: lambda self, this: self._parse_in(this), 717 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 718 TokenType.IS: lambda self, this: self._parse_is(this), 719 TokenType.LIKE: binary_range_parser(exp.Like), 720 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 721 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 722 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 723 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 724 } 725 726 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 727 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 728 "AUTO": lambda self: self._parse_auto_property(), 729 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 730 "BACKUP": lambda self: self.expression( 731 exp.BackupProperty, this=self._parse_var(any_token=True) 732 ), 733 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 734 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 735 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 736 "CHECKSUM": lambda self: self._parse_checksum(), 737 "CLUSTER BY": lambda self: self._parse_cluster(), 738 "CLUSTERED": lambda self: self._parse_clustered_by(), 739 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 740 exp.CollateProperty, **kwargs 741 ), 742 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 743 "CONTAINS": lambda self: self._parse_contains_property(), 744 "COPY": lambda self: self._parse_copy_property(), 745 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 746 "DEFINER": lambda self: self._parse_definer(), 747 "DETERMINISTIC": lambda self: self.expression( 748 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 749 ), 750 "DISTKEY": lambda self: self._parse_distkey(), 751 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 752 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 753 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 754 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 755 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 756 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 757 "FREESPACE": lambda self: self._parse_freespace(), 758 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 759 "HEAP": lambda self: self.expression(exp.HeapProperty), 760 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 761 "IMMUTABLE": lambda self: self.expression( 762 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 763 ), 764 "INHERITS": lambda self: self.expression( 765 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 766 ), 767 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 768 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 769 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 770 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 771 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 772 "LIKE": lambda self: self._parse_create_like(), 773 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 774 "LOCK": lambda self: self._parse_locking(), 775 "LOCKING": lambda self: self._parse_locking(), 776 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 777 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 778 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 779 "MODIFIES": lambda self: self._parse_modifies_property(), 780 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 781 "NO": lambda self: self._parse_no_property(), 782 "ON": lambda self: self._parse_on_property(), 783 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 784 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 785 "PARTITION": lambda self: self._parse_partitioned_of(), 786 "PARTITION BY": lambda self: self._parse_partitioned_by(), 787 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 788 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 789 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 790 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 791 "READS": lambda self: self._parse_reads_property(), 792 "REMOTE": lambda self: self._parse_remote_with_connection(), 793 "RETURNS": lambda self: self._parse_returns(), 794 "ROW": lambda self: self._parse_row(), 795 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 796 "SAMPLE": lambda self: self.expression( 797 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 798 ), 799 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 800 "SETTINGS": lambda self: self.expression( 801 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 802 ), 803 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 804 "SORTKEY": lambda self: self._parse_sortkey(), 805 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 806 "STABLE": lambda self: self.expression( 807 exp.StabilityProperty, this=exp.Literal.string("STABLE") 808 ), 809 "STORED": lambda self: self._parse_stored(), 810 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 811 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 812 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 813 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 814 "TO": lambda self: self._parse_to_table(), 815 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 816 "TRANSFORM": lambda self: self.expression( 817 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 818 ), 819 "TTL": lambda self: self._parse_ttl(), 820 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 821 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 822 "VOLATILE": lambda self: self._parse_volatile_property(), 823 "WITH": lambda self: self._parse_with_property(), 824 } 825 826 CONSTRAINT_PARSERS = { 827 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 828 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 829 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 830 "CHARACTER SET": lambda self: self.expression( 831 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 832 ), 833 "CHECK": lambda self: self.expression( 834 exp.CheckColumnConstraint, 835 this=self._parse_wrapped(self._parse_conjunction), 836 enforced=self._match_text_seq("ENFORCED"), 837 ), 838 "COLLATE": lambda self: self.expression( 839 exp.CollateColumnConstraint, this=self._parse_var() 840 ), 841 "COMMENT": lambda self: self.expression( 842 exp.CommentColumnConstraint, this=self._parse_string() 843 ), 844 "COMPRESS": lambda self: self._parse_compress(), 845 "CLUSTERED": lambda self: self.expression( 846 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 847 ), 848 "NONCLUSTERED": lambda self: self.expression( 849 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 850 ), 851 "DEFAULT": lambda self: self.expression( 852 exp.DefaultColumnConstraint, this=self._parse_bitwise() 853 ), 854 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 855 "EXCLUDE": lambda self: self.expression( 856 exp.ExcludeColumnConstraint, this=self._parse_index_params() 857 ), 858 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 859 "FORMAT": lambda self: self.expression( 860 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 861 ), 862 "GENERATED": lambda self: self._parse_generated_as_identity(), 863 "IDENTITY": lambda self: self._parse_auto_increment(), 864 "INLINE": lambda self: self._parse_inline(), 865 "LIKE": lambda self: self._parse_create_like(), 866 "NOT": lambda self: self._parse_not_constraint(), 867 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 868 "ON": lambda self: ( 869 self._match(TokenType.UPDATE) 870 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 871 ) 872 or self.expression(exp.OnProperty, this=self._parse_id_var()), 873 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 874 "PERIOD": lambda self: self._parse_period_for_system_time(), 875 "PRIMARY KEY": lambda self: self._parse_primary_key(), 876 "REFERENCES": lambda self: self._parse_references(match=False), 877 "TITLE": lambda self: self.expression( 878 exp.TitleColumnConstraint, this=self._parse_var_or_string() 879 ), 880 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 881 "UNIQUE": lambda self: self._parse_unique(), 882 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 883 "WITH": lambda self: self.expression( 884 exp.Properties, expressions=self._parse_wrapped_properties() 885 ), 886 } 887 888 ALTER_PARSERS = { 889 "ADD": lambda self: self._parse_alter_table_add(), 890 "ALTER": lambda self: self._parse_alter_table_alter(), 891 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 892 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 893 "DROP": lambda self: self._parse_alter_table_drop(), 894 "RENAME": lambda self: self._parse_alter_table_rename(), 895 } 896 897 SCHEMA_UNNAMED_CONSTRAINTS = { 898 "CHECK", 899 "EXCLUDE", 900 "FOREIGN KEY", 901 "LIKE", 902 "PERIOD", 903 "PRIMARY KEY", 904 "UNIQUE", 905 } 906 907 NO_PAREN_FUNCTION_PARSERS = { 908 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 909 "CASE": lambda self: self._parse_case(), 910 "IF": lambda self: self._parse_if(), 911 "NEXT": lambda self: self._parse_next_value_for(), 912 } 913 914 INVALID_FUNC_NAME_TOKENS = { 915 TokenType.IDENTIFIER, 916 TokenType.STRING, 917 } 918 919 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 920 921 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 922 923 FUNCTION_PARSERS = { 924 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 925 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 926 "DECODE": lambda self: self._parse_decode(), 927 "EXTRACT": lambda self: self._parse_extract(), 928 "JSON_OBJECT": lambda self: self._parse_json_object(), 929 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 930 "JSON_TABLE": lambda self: self._parse_json_table(), 931 "MATCH": lambda self: self._parse_match_against(), 932 "OPENJSON": lambda self: self._parse_open_json(), 933 "POSITION": lambda self: self._parse_position(), 934 "PREDICT": lambda self: self._parse_predict(), 935 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 936 "STRING_AGG": lambda self: self._parse_string_agg(), 937 "SUBSTRING": lambda self: self._parse_substring(), 938 "TRIM": lambda self: self._parse_trim(), 939 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 940 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 941 } 942 943 QUERY_MODIFIER_PARSERS = { 944 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 945 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 946 TokenType.WHERE: lambda self: ("where", self._parse_where()), 947 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 948 TokenType.HAVING: lambda self: ("having", self._parse_having()), 949 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 950 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 951 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 952 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 953 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 954 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 955 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 956 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 957 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 958 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 959 TokenType.CLUSTER_BY: lambda self: ( 960 "cluster", 961 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 962 ), 963 TokenType.DISTRIBUTE_BY: lambda self: ( 964 "distribute", 965 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 966 ), 967 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 968 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 969 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 970 } 971 972 SET_PARSERS = { 973 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 974 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 975 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 976 "TRANSACTION": lambda self: self._parse_set_transaction(), 977 } 978 979 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 980 981 TYPE_LITERAL_PARSERS = { 982 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 983 } 984 985 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 986 987 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 988 989 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 990 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 991 "ISOLATION": ( 992 ("LEVEL", "REPEATABLE", "READ"), 993 ("LEVEL", "READ", "COMMITTED"), 994 ("LEVEL", "READ", "UNCOMITTED"), 995 ("LEVEL", "SERIALIZABLE"), 996 ), 997 "READ": ("WRITE", "ONLY"), 998 } 999 1000 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1001 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1002 ) 1003 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1004 1005 CREATE_SEQUENCE: OPTIONS_TYPE = { 1006 "SCALE": ("EXTEND", "NOEXTEND"), 1007 "SHARD": ("EXTEND", "NOEXTEND"), 1008 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1009 **dict.fromkeys( 1010 ( 1011 "SESSION", 1012 "GLOBAL", 1013 "KEEP", 1014 "NOKEEP", 1015 "ORDER", 1016 "NOORDER", 1017 "NOCACHE", 1018 "CYCLE", 1019 "NOCYCLE", 1020 "NOMINVALUE", 1021 "NOMAXVALUE", 1022 "NOSCALE", 1023 "NOSHARD", 1024 ), 1025 tuple(), 1026 ), 1027 } 1028 1029 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1030 1031 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1032 1033 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1034 1035 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1036 1037 CLONE_KEYWORDS = {"CLONE", "COPY"} 1038 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1039 1040 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1041 1042 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1043 1044 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1045 1046 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1047 1048 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1049 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1050 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1051 1052 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1053 1054 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1055 1056 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1057 1058 DISTINCT_TOKENS = {TokenType.DISTINCT} 1059 1060 NULL_TOKENS = {TokenType.NULL} 1061 1062 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1063 1064 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1065 1066 STRICT_CAST = True 1067 1068 PREFIXED_PIVOT_COLUMNS = False 1069 IDENTIFY_PIVOT_STRINGS = False 1070 1071 LOG_DEFAULTS_TO_LN = False 1072 1073 # Whether ADD is present for each column added by ALTER TABLE 1074 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1075 1076 # Whether the table sample clause expects CSV syntax 1077 TABLESAMPLE_CSV = False 1078 1079 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1080 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1081 1082 # Whether the TRIM function expects the characters to trim as its first argument 1083 TRIM_PATTERN_FIRST = False 1084 1085 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1086 STRING_ALIASES = False 1087 1088 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1089 MODIFIERS_ATTACHED_TO_UNION = True 1090 UNION_MODIFIERS = {"order", "limit", "offset"} 1091 1092 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1093 NO_PAREN_IF_COMMANDS = True 1094 1095 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1096 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1097 1098 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1099 # If this is True and '(' is not found, the keyword will be treated as an identifier 1100 VALUES_FOLLOWED_BY_PAREN = True 1101 1102 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1103 SUPPORTS_IMPLICIT_UNNEST = False 1104 1105 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1106 INTERVAL_SPANS = True 1107 1108 __slots__ = ( 1109 "error_level", 1110 "error_message_context", 1111 "max_errors", 1112 "dialect", 1113 "sql", 1114 "errors", 1115 "_tokens", 1116 "_index", 1117 "_curr", 1118 "_next", 1119 "_prev", 1120 "_prev_comments", 1121 ) 1122 1123 # Autofilled 1124 SHOW_TRIE: t.Dict = {} 1125 SET_TRIE: t.Dict = {} 1126 1127 def __init__( 1128 self, 1129 error_level: t.Optional[ErrorLevel] = None, 1130 error_message_context: int = 100, 1131 max_errors: int = 3, 1132 dialect: DialectType = None, 1133 ): 1134 from sqlglot.dialects import Dialect 1135 1136 self.error_level = error_level or ErrorLevel.IMMEDIATE 1137 self.error_message_context = error_message_context 1138 self.max_errors = max_errors 1139 self.dialect = Dialect.get_or_raise(dialect) 1140 self.reset() 1141 1142 def reset(self): 1143 self.sql = "" 1144 self.errors = [] 1145 self._tokens = [] 1146 self._index = 0 1147 self._curr = None 1148 self._next = None 1149 self._prev = None 1150 self._prev_comments = None 1151 1152 def parse( 1153 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1154 ) -> t.List[t.Optional[exp.Expression]]: 1155 """ 1156 Parses a list of tokens and returns a list of syntax trees, one tree 1157 per parsed SQL statement. 1158 1159 Args: 1160 raw_tokens: The list of tokens. 1161 sql: The original SQL string, used to produce helpful debug messages. 1162 1163 Returns: 1164 The list of the produced syntax trees. 1165 """ 1166 return self._parse( 1167 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1168 ) 1169 1170 def parse_into( 1171 self, 1172 expression_types: exp.IntoType, 1173 raw_tokens: t.List[Token], 1174 sql: t.Optional[str] = None, 1175 ) -> t.List[t.Optional[exp.Expression]]: 1176 """ 1177 Parses a list of tokens into a given Expression type. If a collection of Expression 1178 types is given instead, this method will try to parse the token list into each one 1179 of them, stopping at the first for which the parsing succeeds. 1180 1181 Args: 1182 expression_types: The expression type(s) to try and parse the token list into. 1183 raw_tokens: The list of tokens. 1184 sql: The original SQL string, used to produce helpful debug messages. 1185 1186 Returns: 1187 The target Expression. 1188 """ 1189 errors = [] 1190 for expression_type in ensure_list(expression_types): 1191 parser = self.EXPRESSION_PARSERS.get(expression_type) 1192 if not parser: 1193 raise TypeError(f"No parser registered for {expression_type}") 1194 1195 try: 1196 return self._parse(parser, raw_tokens, sql) 1197 except ParseError as e: 1198 e.errors[0]["into_expression"] = expression_type 1199 errors.append(e) 1200 1201 raise ParseError( 1202 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1203 errors=merge_errors(errors), 1204 ) from errors[-1] 1205 1206 def _parse( 1207 self, 1208 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1209 raw_tokens: t.List[Token], 1210 sql: t.Optional[str] = None, 1211 ) -> t.List[t.Optional[exp.Expression]]: 1212 self.reset() 1213 self.sql = sql or "" 1214 1215 total = len(raw_tokens) 1216 chunks: t.List[t.List[Token]] = [[]] 1217 1218 for i, token in enumerate(raw_tokens): 1219 if token.token_type == TokenType.SEMICOLON: 1220 if i < total - 1: 1221 chunks.append([]) 1222 else: 1223 chunks[-1].append(token) 1224 1225 expressions = [] 1226 1227 for tokens in chunks: 1228 self._index = -1 1229 self._tokens = tokens 1230 self._advance() 1231 1232 expressions.append(parse_method(self)) 1233 1234 if self._index < len(self._tokens): 1235 self.raise_error("Invalid expression / Unexpected token") 1236 1237 self.check_errors() 1238 1239 return expressions 1240 1241 def check_errors(self) -> None: 1242 """Logs or raises any found errors, depending on the chosen error level setting.""" 1243 if self.error_level == ErrorLevel.WARN: 1244 for error in self.errors: 1245 logger.error(str(error)) 1246 elif self.error_level == ErrorLevel.RAISE and self.errors: 1247 raise ParseError( 1248 concat_messages(self.errors, self.max_errors), 1249 errors=merge_errors(self.errors), 1250 ) 1251 1252 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1253 """ 1254 Appends an error in the list of recorded errors or raises it, depending on the chosen 1255 error level setting. 1256 """ 1257 token = token or self._curr or self._prev or Token.string("") 1258 start = token.start 1259 end = token.end + 1 1260 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1261 highlight = self.sql[start:end] 1262 end_context = self.sql[end : end + self.error_message_context] 1263 1264 error = ParseError.new( 1265 f"{message}. Line {token.line}, Col: {token.col}.\n" 1266 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1267 description=message, 1268 line=token.line, 1269 col=token.col, 1270 start_context=start_context, 1271 highlight=highlight, 1272 end_context=end_context, 1273 ) 1274 1275 if self.error_level == ErrorLevel.IMMEDIATE: 1276 raise error 1277 1278 self.errors.append(error) 1279 1280 def expression( 1281 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1282 ) -> E: 1283 """ 1284 Creates a new, validated Expression. 1285 1286 Args: 1287 exp_class: The expression class to instantiate. 1288 comments: An optional list of comments to attach to the expression. 1289 kwargs: The arguments to set for the expression along with their respective values. 1290 1291 Returns: 1292 The target expression. 1293 """ 1294 instance = exp_class(**kwargs) 1295 instance.add_comments(comments) if comments else self._add_comments(instance) 1296 return self.validate_expression(instance) 1297 1298 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1299 if expression and self._prev_comments: 1300 expression.add_comments(self._prev_comments) 1301 self._prev_comments = None 1302 1303 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1304 """ 1305 Validates an Expression, making sure that all its mandatory arguments are set. 1306 1307 Args: 1308 expression: The expression to validate. 1309 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1310 1311 Returns: 1312 The validated expression. 1313 """ 1314 if self.error_level != ErrorLevel.IGNORE: 1315 for error_message in expression.error_messages(args): 1316 self.raise_error(error_message) 1317 1318 return expression 1319 1320 def _find_sql(self, start: Token, end: Token) -> str: 1321 return self.sql[start.start : end.end + 1] 1322 1323 def _is_connected(self) -> bool: 1324 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1325 1326 def _advance(self, times: int = 1) -> None: 1327 self._index += times 1328 self._curr = seq_get(self._tokens, self._index) 1329 self._next = seq_get(self._tokens, self._index + 1) 1330 1331 if self._index > 0: 1332 self._prev = self._tokens[self._index - 1] 1333 self._prev_comments = self._prev.comments 1334 else: 1335 self._prev = None 1336 self._prev_comments = None 1337 1338 def _retreat(self, index: int) -> None: 1339 if index != self._index: 1340 self._advance(index - self._index) 1341 1342 def _warn_unsupported(self) -> None: 1343 if len(self._tokens) <= 1: 1344 return 1345 1346 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1347 # interested in emitting a warning for the one being currently processed. 1348 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1349 1350 logger.warning( 1351 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1352 ) 1353 1354 def _parse_command(self) -> exp.Command: 1355 self._warn_unsupported() 1356 return self.expression( 1357 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1358 ) 1359 1360 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1361 """ 1362 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1363 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1364 the parser state accordingly 1365 """ 1366 index = self._index 1367 error_level = self.error_level 1368 1369 self.error_level = ErrorLevel.IMMEDIATE 1370 try: 1371 this = parse_method() 1372 except ParseError: 1373 this = None 1374 finally: 1375 if not this or retreat: 1376 self._retreat(index) 1377 self.error_level = error_level 1378 1379 return this 1380 1381 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1382 start = self._prev 1383 exists = self._parse_exists() if allow_exists else None 1384 1385 self._match(TokenType.ON) 1386 1387 kind = self._match_set(self.CREATABLES) and self._prev 1388 if not kind: 1389 return self._parse_as_command(start) 1390 1391 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1392 this = self._parse_user_defined_function(kind=kind.token_type) 1393 elif kind.token_type == TokenType.TABLE: 1394 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1395 elif kind.token_type == TokenType.COLUMN: 1396 this = self._parse_column() 1397 else: 1398 this = self._parse_id_var() 1399 1400 self._match(TokenType.IS) 1401 1402 return self.expression( 1403 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1404 ) 1405 1406 def _parse_to_table( 1407 self, 1408 ) -> exp.ToTableProperty: 1409 table = self._parse_table_parts(schema=True) 1410 return self.expression(exp.ToTableProperty, this=table) 1411 1412 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1413 def _parse_ttl(self) -> exp.Expression: 1414 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1415 this = self._parse_bitwise() 1416 1417 if self._match_text_seq("DELETE"): 1418 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1419 if self._match_text_seq("RECOMPRESS"): 1420 return self.expression( 1421 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1422 ) 1423 if self._match_text_seq("TO", "DISK"): 1424 return self.expression( 1425 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1426 ) 1427 if self._match_text_seq("TO", "VOLUME"): 1428 return self.expression( 1429 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1430 ) 1431 1432 return this 1433 1434 expressions = self._parse_csv(_parse_ttl_action) 1435 where = self._parse_where() 1436 group = self._parse_group() 1437 1438 aggregates = None 1439 if group and self._match(TokenType.SET): 1440 aggregates = self._parse_csv(self._parse_set_item) 1441 1442 return self.expression( 1443 exp.MergeTreeTTL, 1444 expressions=expressions, 1445 where=where, 1446 group=group, 1447 aggregates=aggregates, 1448 ) 1449 1450 def _parse_statement(self) -> t.Optional[exp.Expression]: 1451 if self._curr is None: 1452 return None 1453 1454 if self._match_set(self.STATEMENT_PARSERS): 1455 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1456 1457 if self._match_set(Tokenizer.COMMANDS): 1458 return self._parse_command() 1459 1460 expression = self._parse_expression() 1461 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1462 return self._parse_query_modifiers(expression) 1463 1464 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1465 start = self._prev 1466 temporary = self._match(TokenType.TEMPORARY) 1467 materialized = self._match_text_seq("MATERIALIZED") 1468 1469 kind = self._match_set(self.CREATABLES) and self._prev.text 1470 if not kind: 1471 return self._parse_as_command(start) 1472 1473 if_exists = exists or self._parse_exists() 1474 table = self._parse_table_parts( 1475 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1476 ) 1477 1478 if self._match(TokenType.L_PAREN, advance=False): 1479 expressions = self._parse_wrapped_csv(self._parse_types) 1480 else: 1481 expressions = None 1482 1483 return self.expression( 1484 exp.Drop, 1485 comments=start.comments, 1486 exists=if_exists, 1487 this=table, 1488 expressions=expressions, 1489 kind=kind, 1490 temporary=temporary, 1491 materialized=materialized, 1492 cascade=self._match_text_seq("CASCADE"), 1493 constraints=self._match_text_seq("CONSTRAINTS"), 1494 purge=self._match_text_seq("PURGE"), 1495 ) 1496 1497 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1498 return ( 1499 self._match_text_seq("IF") 1500 and (not not_ or self._match(TokenType.NOT)) 1501 and self._match(TokenType.EXISTS) 1502 ) 1503 1504 def _parse_create(self) -> exp.Create | exp.Command: 1505 # Note: this can't be None because we've matched a statement parser 1506 start = self._prev 1507 comments = self._prev_comments 1508 1509 replace = ( 1510 start.token_type == TokenType.REPLACE 1511 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1512 or self._match_pair(TokenType.OR, TokenType.ALTER) 1513 ) 1514 1515 unique = self._match(TokenType.UNIQUE) 1516 1517 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1518 self._advance() 1519 1520 properties = None 1521 create_token = self._match_set(self.CREATABLES) and self._prev 1522 1523 if not create_token: 1524 # exp.Properties.Location.POST_CREATE 1525 properties = self._parse_properties() 1526 create_token = self._match_set(self.CREATABLES) and self._prev 1527 1528 if not properties or not create_token: 1529 return self._parse_as_command(start) 1530 1531 exists = self._parse_exists(not_=True) 1532 this = None 1533 expression: t.Optional[exp.Expression] = None 1534 indexes = None 1535 no_schema_binding = None 1536 begin = None 1537 end = None 1538 clone = None 1539 1540 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1541 nonlocal properties 1542 if properties and temp_props: 1543 properties.expressions.extend(temp_props.expressions) 1544 elif temp_props: 1545 properties = temp_props 1546 1547 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1548 this = self._parse_user_defined_function(kind=create_token.token_type) 1549 1550 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1551 extend_props(self._parse_properties()) 1552 1553 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1554 1555 if not expression: 1556 if self._match(TokenType.COMMAND): 1557 expression = self._parse_as_command(self._prev) 1558 else: 1559 begin = self._match(TokenType.BEGIN) 1560 return_ = self._match_text_seq("RETURN") 1561 1562 if self._match(TokenType.STRING, advance=False): 1563 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1564 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1565 expression = self._parse_string() 1566 extend_props(self._parse_properties()) 1567 else: 1568 expression = self._parse_statement() 1569 1570 end = self._match_text_seq("END") 1571 1572 if return_: 1573 expression = self.expression(exp.Return, this=expression) 1574 elif create_token.token_type == TokenType.INDEX: 1575 this = self._parse_index(index=self._parse_id_var()) 1576 elif create_token.token_type in self.DB_CREATABLES: 1577 table_parts = self._parse_table_parts( 1578 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1579 ) 1580 1581 # exp.Properties.Location.POST_NAME 1582 self._match(TokenType.COMMA) 1583 extend_props(self._parse_properties(before=True)) 1584 1585 this = self._parse_schema(this=table_parts) 1586 1587 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1588 extend_props(self._parse_properties()) 1589 1590 self._match(TokenType.ALIAS) 1591 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1592 # exp.Properties.Location.POST_ALIAS 1593 extend_props(self._parse_properties()) 1594 1595 if create_token.token_type == TokenType.SEQUENCE: 1596 expression = self._parse_types() 1597 extend_props(self._parse_properties()) 1598 else: 1599 expression = self._parse_ddl_select() 1600 1601 if create_token.token_type == TokenType.TABLE: 1602 # exp.Properties.Location.POST_EXPRESSION 1603 extend_props(self._parse_properties()) 1604 1605 indexes = [] 1606 while True: 1607 index = self._parse_index() 1608 1609 # exp.Properties.Location.POST_INDEX 1610 extend_props(self._parse_properties()) 1611 1612 if not index: 1613 break 1614 else: 1615 self._match(TokenType.COMMA) 1616 indexes.append(index) 1617 elif create_token.token_type == TokenType.VIEW: 1618 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1619 no_schema_binding = True 1620 1621 shallow = self._match_text_seq("SHALLOW") 1622 1623 if self._match_texts(self.CLONE_KEYWORDS): 1624 copy = self._prev.text.lower() == "copy" 1625 clone = self.expression( 1626 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1627 ) 1628 1629 if self._curr: 1630 return self._parse_as_command(start) 1631 1632 return self.expression( 1633 exp.Create, 1634 comments=comments, 1635 this=this, 1636 kind=create_token.text.upper(), 1637 replace=replace, 1638 unique=unique, 1639 expression=expression, 1640 exists=exists, 1641 properties=properties, 1642 indexes=indexes, 1643 no_schema_binding=no_schema_binding, 1644 begin=begin, 1645 end=end, 1646 clone=clone, 1647 ) 1648 1649 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1650 seq = exp.SequenceProperties() 1651 1652 options = [] 1653 index = self._index 1654 1655 while self._curr: 1656 if self._match_text_seq("INCREMENT"): 1657 self._match_text_seq("BY") 1658 self._match_text_seq("=") 1659 seq.set("increment", self._parse_term()) 1660 elif self._match_text_seq("MINVALUE"): 1661 seq.set("minvalue", self._parse_term()) 1662 elif self._match_text_seq("MAXVALUE"): 1663 seq.set("maxvalue", self._parse_term()) 1664 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1665 self._match_text_seq("=") 1666 seq.set("start", self._parse_term()) 1667 elif self._match_text_seq("CACHE"): 1668 # T-SQL allows empty CACHE which is initialized dynamically 1669 seq.set("cache", self._parse_number() or True) 1670 elif self._match_text_seq("OWNED", "BY"): 1671 # "OWNED BY NONE" is the default 1672 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1673 else: 1674 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1675 if opt: 1676 options.append(opt) 1677 else: 1678 break 1679 1680 seq.set("options", options if options else None) 1681 return None if self._index == index else seq 1682 1683 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1684 # only used for teradata currently 1685 self._match(TokenType.COMMA) 1686 1687 kwargs = { 1688 "no": self._match_text_seq("NO"), 1689 "dual": self._match_text_seq("DUAL"), 1690 "before": self._match_text_seq("BEFORE"), 1691 "default": self._match_text_seq("DEFAULT"), 1692 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1693 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1694 "after": self._match_text_seq("AFTER"), 1695 "minimum": self._match_texts(("MIN", "MINIMUM")), 1696 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1697 } 1698 1699 if self._match_texts(self.PROPERTY_PARSERS): 1700 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1701 try: 1702 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1703 except TypeError: 1704 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1705 1706 return None 1707 1708 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1709 return self._parse_wrapped_csv(self._parse_property) 1710 1711 def _parse_property(self) -> t.Optional[exp.Expression]: 1712 if self._match_texts(self.PROPERTY_PARSERS): 1713 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1714 1715 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1716 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1717 1718 if self._match_text_seq("COMPOUND", "SORTKEY"): 1719 return self._parse_sortkey(compound=True) 1720 1721 if self._match_text_seq("SQL", "SECURITY"): 1722 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1723 1724 index = self._index 1725 key = self._parse_column() 1726 1727 if not self._match(TokenType.EQ): 1728 self._retreat(index) 1729 return self._parse_sequence_properties() 1730 1731 return self.expression( 1732 exp.Property, 1733 this=key.to_dot() if isinstance(key, exp.Column) else key, 1734 value=self._parse_bitwise() or self._parse_var(any_token=True), 1735 ) 1736 1737 def _parse_stored(self) -> exp.FileFormatProperty: 1738 self._match(TokenType.ALIAS) 1739 1740 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1741 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1742 1743 return self.expression( 1744 exp.FileFormatProperty, 1745 this=( 1746 self.expression( 1747 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1748 ) 1749 if input_format or output_format 1750 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1751 ), 1752 ) 1753 1754 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1755 self._match(TokenType.EQ) 1756 self._match(TokenType.ALIAS) 1757 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1758 1759 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1760 properties = [] 1761 while True: 1762 if before: 1763 prop = self._parse_property_before() 1764 else: 1765 prop = self._parse_property() 1766 if not prop: 1767 break 1768 for p in ensure_list(prop): 1769 properties.append(p) 1770 1771 if properties: 1772 return self.expression(exp.Properties, expressions=properties) 1773 1774 return None 1775 1776 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1777 return self.expression( 1778 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1779 ) 1780 1781 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1782 if self._index >= 2: 1783 pre_volatile_token = self._tokens[self._index - 2] 1784 else: 1785 pre_volatile_token = None 1786 1787 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1788 return exp.VolatileProperty() 1789 1790 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1791 1792 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1793 self._match_pair(TokenType.EQ, TokenType.ON) 1794 1795 prop = self.expression(exp.WithSystemVersioningProperty) 1796 if self._match(TokenType.L_PAREN): 1797 self._match_text_seq("HISTORY_TABLE", "=") 1798 prop.set("this", self._parse_table_parts()) 1799 1800 if self._match(TokenType.COMMA): 1801 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1802 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1803 1804 self._match_r_paren() 1805 1806 return prop 1807 1808 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1809 if self._match(TokenType.L_PAREN, advance=False): 1810 return self._parse_wrapped_properties() 1811 1812 if self._match_text_seq("JOURNAL"): 1813 return self._parse_withjournaltable() 1814 1815 if self._match_texts(self.VIEW_ATTRIBUTES): 1816 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1817 1818 if self._match_text_seq("DATA"): 1819 return self._parse_withdata(no=False) 1820 elif self._match_text_seq("NO", "DATA"): 1821 return self._parse_withdata(no=True) 1822 1823 if not self._next: 1824 return None 1825 1826 return self._parse_withisolatedloading() 1827 1828 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1829 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1830 self._match(TokenType.EQ) 1831 1832 user = self._parse_id_var() 1833 self._match(TokenType.PARAMETER) 1834 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1835 1836 if not user or not host: 1837 return None 1838 1839 return exp.DefinerProperty(this=f"{user}@{host}") 1840 1841 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1842 self._match(TokenType.TABLE) 1843 self._match(TokenType.EQ) 1844 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1845 1846 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1847 return self.expression(exp.LogProperty, no=no) 1848 1849 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1850 return self.expression(exp.JournalProperty, **kwargs) 1851 1852 def _parse_checksum(self) -> exp.ChecksumProperty: 1853 self._match(TokenType.EQ) 1854 1855 on = None 1856 if self._match(TokenType.ON): 1857 on = True 1858 elif self._match_text_seq("OFF"): 1859 on = False 1860 1861 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1862 1863 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1864 return self.expression( 1865 exp.Cluster, 1866 expressions=( 1867 self._parse_wrapped_csv(self._parse_ordered) 1868 if wrapped 1869 else self._parse_csv(self._parse_ordered) 1870 ), 1871 ) 1872 1873 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1874 self._match_text_seq("BY") 1875 1876 self._match_l_paren() 1877 expressions = self._parse_csv(self._parse_column) 1878 self._match_r_paren() 1879 1880 if self._match_text_seq("SORTED", "BY"): 1881 self._match_l_paren() 1882 sorted_by = self._parse_csv(self._parse_ordered) 1883 self._match_r_paren() 1884 else: 1885 sorted_by = None 1886 1887 self._match(TokenType.INTO) 1888 buckets = self._parse_number() 1889 self._match_text_seq("BUCKETS") 1890 1891 return self.expression( 1892 exp.ClusteredByProperty, 1893 expressions=expressions, 1894 sorted_by=sorted_by, 1895 buckets=buckets, 1896 ) 1897 1898 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1899 if not self._match_text_seq("GRANTS"): 1900 self._retreat(self._index - 1) 1901 return None 1902 1903 return self.expression(exp.CopyGrantsProperty) 1904 1905 def _parse_freespace(self) -> exp.FreespaceProperty: 1906 self._match(TokenType.EQ) 1907 return self.expression( 1908 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1909 ) 1910 1911 def _parse_mergeblockratio( 1912 self, no: bool = False, default: bool = False 1913 ) -> exp.MergeBlockRatioProperty: 1914 if self._match(TokenType.EQ): 1915 return self.expression( 1916 exp.MergeBlockRatioProperty, 1917 this=self._parse_number(), 1918 percent=self._match(TokenType.PERCENT), 1919 ) 1920 1921 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1922 1923 def _parse_datablocksize( 1924 self, 1925 default: t.Optional[bool] = None, 1926 minimum: t.Optional[bool] = None, 1927 maximum: t.Optional[bool] = None, 1928 ) -> exp.DataBlocksizeProperty: 1929 self._match(TokenType.EQ) 1930 size = self._parse_number() 1931 1932 units = None 1933 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1934 units = self._prev.text 1935 1936 return self.expression( 1937 exp.DataBlocksizeProperty, 1938 size=size, 1939 units=units, 1940 default=default, 1941 minimum=minimum, 1942 maximum=maximum, 1943 ) 1944 1945 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1946 self._match(TokenType.EQ) 1947 always = self._match_text_seq("ALWAYS") 1948 manual = self._match_text_seq("MANUAL") 1949 never = self._match_text_seq("NEVER") 1950 default = self._match_text_seq("DEFAULT") 1951 1952 autotemp = None 1953 if self._match_text_seq("AUTOTEMP"): 1954 autotemp = self._parse_schema() 1955 1956 return self.expression( 1957 exp.BlockCompressionProperty, 1958 always=always, 1959 manual=manual, 1960 never=never, 1961 default=default, 1962 autotemp=autotemp, 1963 ) 1964 1965 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 1966 index = self._index 1967 no = self._match_text_seq("NO") 1968 concurrent = self._match_text_seq("CONCURRENT") 1969 1970 if not self._match_text_seq("ISOLATED", "LOADING"): 1971 self._retreat(index) 1972 return None 1973 1974 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 1975 return self.expression( 1976 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 1977 ) 1978 1979 def _parse_locking(self) -> exp.LockingProperty: 1980 if self._match(TokenType.TABLE): 1981 kind = "TABLE" 1982 elif self._match(TokenType.VIEW): 1983 kind = "VIEW" 1984 elif self._match(TokenType.ROW): 1985 kind = "ROW" 1986 elif self._match_text_seq("DATABASE"): 1987 kind = "DATABASE" 1988 else: 1989 kind = None 1990 1991 if kind in ("DATABASE", "TABLE", "VIEW"): 1992 this = self._parse_table_parts() 1993 else: 1994 this = None 1995 1996 if self._match(TokenType.FOR): 1997 for_or_in = "FOR" 1998 elif self._match(TokenType.IN): 1999 for_or_in = "IN" 2000 else: 2001 for_or_in = None 2002 2003 if self._match_text_seq("ACCESS"): 2004 lock_type = "ACCESS" 2005 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2006 lock_type = "EXCLUSIVE" 2007 elif self._match_text_seq("SHARE"): 2008 lock_type = "SHARE" 2009 elif self._match_text_seq("READ"): 2010 lock_type = "READ" 2011 elif self._match_text_seq("WRITE"): 2012 lock_type = "WRITE" 2013 elif self._match_text_seq("CHECKSUM"): 2014 lock_type = "CHECKSUM" 2015 else: 2016 lock_type = None 2017 2018 override = self._match_text_seq("OVERRIDE") 2019 2020 return self.expression( 2021 exp.LockingProperty, 2022 this=this, 2023 kind=kind, 2024 for_or_in=for_or_in, 2025 lock_type=lock_type, 2026 override=override, 2027 ) 2028 2029 def _parse_partition_by(self) -> t.List[exp.Expression]: 2030 if self._match(TokenType.PARTITION_BY): 2031 return self._parse_csv(self._parse_conjunction) 2032 return [] 2033 2034 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2035 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2036 if self._match_text_seq("MINVALUE"): 2037 return exp.var("MINVALUE") 2038 if self._match_text_seq("MAXVALUE"): 2039 return exp.var("MAXVALUE") 2040 return self._parse_bitwise() 2041 2042 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2043 expression = None 2044 from_expressions = None 2045 to_expressions = None 2046 2047 if self._match(TokenType.IN): 2048 this = self._parse_wrapped_csv(self._parse_bitwise) 2049 elif self._match(TokenType.FROM): 2050 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2051 self._match_text_seq("TO") 2052 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2053 elif self._match_text_seq("WITH", "(", "MODULUS"): 2054 this = self._parse_number() 2055 self._match_text_seq(",", "REMAINDER") 2056 expression = self._parse_number() 2057 self._match_r_paren() 2058 else: 2059 self.raise_error("Failed to parse partition bound spec.") 2060 2061 return self.expression( 2062 exp.PartitionBoundSpec, 2063 this=this, 2064 expression=expression, 2065 from_expressions=from_expressions, 2066 to_expressions=to_expressions, 2067 ) 2068 2069 # https://www.postgresql.org/docs/current/sql-createtable.html 2070 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2071 if not self._match_text_seq("OF"): 2072 self._retreat(self._index - 1) 2073 return None 2074 2075 this = self._parse_table(schema=True) 2076 2077 if self._match(TokenType.DEFAULT): 2078 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2079 elif self._match_text_seq("FOR", "VALUES"): 2080 expression = self._parse_partition_bound_spec() 2081 else: 2082 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2083 2084 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2085 2086 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2087 self._match(TokenType.EQ) 2088 return self.expression( 2089 exp.PartitionedByProperty, 2090 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2091 ) 2092 2093 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2094 if self._match_text_seq("AND", "STATISTICS"): 2095 statistics = True 2096 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2097 statistics = False 2098 else: 2099 statistics = None 2100 2101 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2102 2103 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2104 if self._match_text_seq("SQL"): 2105 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2106 return None 2107 2108 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2109 if self._match_text_seq("SQL", "DATA"): 2110 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2111 return None 2112 2113 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2114 if self._match_text_seq("PRIMARY", "INDEX"): 2115 return exp.NoPrimaryIndexProperty() 2116 if self._match_text_seq("SQL"): 2117 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2118 return None 2119 2120 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2121 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2122 return exp.OnCommitProperty() 2123 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2124 return exp.OnCommitProperty(delete=True) 2125 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2126 2127 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2128 if self._match_text_seq("SQL", "DATA"): 2129 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2130 return None 2131 2132 def _parse_distkey(self) -> exp.DistKeyProperty: 2133 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2134 2135 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2136 table = self._parse_table(schema=True) 2137 2138 options = [] 2139 while self._match_texts(("INCLUDING", "EXCLUDING")): 2140 this = self._prev.text.upper() 2141 2142 id_var = self._parse_id_var() 2143 if not id_var: 2144 return None 2145 2146 options.append( 2147 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2148 ) 2149 2150 return self.expression(exp.LikeProperty, this=table, expressions=options) 2151 2152 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2153 return self.expression( 2154 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2155 ) 2156 2157 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2158 self._match(TokenType.EQ) 2159 return self.expression( 2160 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2161 ) 2162 2163 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2164 self._match_text_seq("WITH", "CONNECTION") 2165 return self.expression( 2166 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2167 ) 2168 2169 def _parse_returns(self) -> exp.ReturnsProperty: 2170 value: t.Optional[exp.Expression] 2171 is_table = self._match(TokenType.TABLE) 2172 2173 if is_table: 2174 if self._match(TokenType.LT): 2175 value = self.expression( 2176 exp.Schema, 2177 this="TABLE", 2178 expressions=self._parse_csv(self._parse_struct_types), 2179 ) 2180 if not self._match(TokenType.GT): 2181 self.raise_error("Expecting >") 2182 else: 2183 value = self._parse_schema(exp.var("TABLE")) 2184 else: 2185 value = self._parse_types() 2186 2187 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2188 2189 def _parse_describe(self) -> exp.Describe: 2190 kind = self._match_set(self.CREATABLES) and self._prev.text 2191 style = self._match_texts(("EXTENDED", "FORMATTED")) and self._prev.text.upper() 2192 this = self._parse_table(schema=True) 2193 properties = self._parse_properties() 2194 expressions = properties.expressions if properties else None 2195 return self.expression( 2196 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2197 ) 2198 2199 def _parse_insert(self) -> exp.Insert: 2200 comments = ensure_list(self._prev_comments) 2201 hint = self._parse_hint() 2202 overwrite = self._match(TokenType.OVERWRITE) 2203 ignore = self._match(TokenType.IGNORE) 2204 local = self._match_text_seq("LOCAL") 2205 alternative = None 2206 is_function = None 2207 2208 if self._match_text_seq("DIRECTORY"): 2209 this: t.Optional[exp.Expression] = self.expression( 2210 exp.Directory, 2211 this=self._parse_var_or_string(), 2212 local=local, 2213 row_format=self._parse_row_format(match_row=True), 2214 ) 2215 else: 2216 if self._match(TokenType.OR): 2217 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2218 2219 self._match(TokenType.INTO) 2220 comments += ensure_list(self._prev_comments) 2221 self._match(TokenType.TABLE) 2222 is_function = self._match(TokenType.FUNCTION) 2223 2224 this = self._parse_table(schema=True) if not is_function else self._parse_function() 2225 2226 returning = self._parse_returning() 2227 2228 return self.expression( 2229 exp.Insert, 2230 comments=comments, 2231 hint=hint, 2232 is_function=is_function, 2233 this=this, 2234 by_name=self._match_text_seq("BY", "NAME"), 2235 exists=self._parse_exists(), 2236 partition=self._parse_partition(), 2237 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2238 and self._parse_conjunction(), 2239 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2240 conflict=self._parse_on_conflict(), 2241 returning=returning or self._parse_returning(), 2242 overwrite=overwrite, 2243 alternative=alternative, 2244 ignore=ignore, 2245 ) 2246 2247 def _parse_kill(self) -> exp.Kill: 2248 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2249 2250 return self.expression( 2251 exp.Kill, 2252 this=self._parse_primary(), 2253 kind=kind, 2254 ) 2255 2256 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2257 conflict = self._match_text_seq("ON", "CONFLICT") 2258 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2259 2260 if not conflict and not duplicate: 2261 return None 2262 2263 conflict_keys = None 2264 constraint = None 2265 2266 if conflict: 2267 if self._match_text_seq("ON", "CONSTRAINT"): 2268 constraint = self._parse_id_var() 2269 elif self._match(TokenType.L_PAREN): 2270 conflict_keys = self._parse_csv(self._parse_id_var) 2271 self._match_r_paren() 2272 2273 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2274 if self._prev.token_type == TokenType.UPDATE: 2275 self._match(TokenType.SET) 2276 expressions = self._parse_csv(self._parse_equality) 2277 else: 2278 expressions = None 2279 2280 return self.expression( 2281 exp.OnConflict, 2282 duplicate=duplicate, 2283 expressions=expressions, 2284 action=action, 2285 conflict_keys=conflict_keys, 2286 constraint=constraint, 2287 ) 2288 2289 def _parse_returning(self) -> t.Optional[exp.Returning]: 2290 if not self._match(TokenType.RETURNING): 2291 return None 2292 return self.expression( 2293 exp.Returning, 2294 expressions=self._parse_csv(self._parse_expression), 2295 into=self._match(TokenType.INTO) and self._parse_table_part(), 2296 ) 2297 2298 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2299 if not self._match(TokenType.FORMAT): 2300 return None 2301 return self._parse_row_format() 2302 2303 def _parse_row_format( 2304 self, match_row: bool = False 2305 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2306 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2307 return None 2308 2309 if self._match_text_seq("SERDE"): 2310 this = self._parse_string() 2311 2312 serde_properties = None 2313 if self._match(TokenType.SERDE_PROPERTIES): 2314 serde_properties = self.expression( 2315 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2316 ) 2317 2318 return self.expression( 2319 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2320 ) 2321 2322 self._match_text_seq("DELIMITED") 2323 2324 kwargs = {} 2325 2326 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2327 kwargs["fields"] = self._parse_string() 2328 if self._match_text_seq("ESCAPED", "BY"): 2329 kwargs["escaped"] = self._parse_string() 2330 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2331 kwargs["collection_items"] = self._parse_string() 2332 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2333 kwargs["map_keys"] = self._parse_string() 2334 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2335 kwargs["lines"] = self._parse_string() 2336 if self._match_text_seq("NULL", "DEFINED", "AS"): 2337 kwargs["null"] = self._parse_string() 2338 2339 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2340 2341 def _parse_load(self) -> exp.LoadData | exp.Command: 2342 if self._match_text_seq("DATA"): 2343 local = self._match_text_seq("LOCAL") 2344 self._match_text_seq("INPATH") 2345 inpath = self._parse_string() 2346 overwrite = self._match(TokenType.OVERWRITE) 2347 self._match_pair(TokenType.INTO, TokenType.TABLE) 2348 2349 return self.expression( 2350 exp.LoadData, 2351 this=self._parse_table(schema=True), 2352 local=local, 2353 overwrite=overwrite, 2354 inpath=inpath, 2355 partition=self._parse_partition(), 2356 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2357 serde=self._match_text_seq("SERDE") and self._parse_string(), 2358 ) 2359 return self._parse_as_command(self._prev) 2360 2361 def _parse_delete(self) -> exp.Delete: 2362 # This handles MySQL's "Multiple-Table Syntax" 2363 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2364 tables = None 2365 comments = self._prev_comments 2366 if not self._match(TokenType.FROM, advance=False): 2367 tables = self._parse_csv(self._parse_table) or None 2368 2369 returning = self._parse_returning() 2370 2371 return self.expression( 2372 exp.Delete, 2373 comments=comments, 2374 tables=tables, 2375 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2376 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2377 where=self._parse_where(), 2378 returning=returning or self._parse_returning(), 2379 limit=self._parse_limit(), 2380 ) 2381 2382 def _parse_update(self) -> exp.Update: 2383 comments = self._prev_comments 2384 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2385 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2386 returning = self._parse_returning() 2387 return self.expression( 2388 exp.Update, 2389 comments=comments, 2390 **{ # type: ignore 2391 "this": this, 2392 "expressions": expressions, 2393 "from": self._parse_from(joins=True), 2394 "where": self._parse_where(), 2395 "returning": returning or self._parse_returning(), 2396 "order": self._parse_order(), 2397 "limit": self._parse_limit(), 2398 }, 2399 ) 2400 2401 def _parse_uncache(self) -> exp.Uncache: 2402 if not self._match(TokenType.TABLE): 2403 self.raise_error("Expecting TABLE after UNCACHE") 2404 2405 return self.expression( 2406 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2407 ) 2408 2409 def _parse_cache(self) -> exp.Cache: 2410 lazy = self._match_text_seq("LAZY") 2411 self._match(TokenType.TABLE) 2412 table = self._parse_table(schema=True) 2413 2414 options = [] 2415 if self._match_text_seq("OPTIONS"): 2416 self._match_l_paren() 2417 k = self._parse_string() 2418 self._match(TokenType.EQ) 2419 v = self._parse_string() 2420 options = [k, v] 2421 self._match_r_paren() 2422 2423 self._match(TokenType.ALIAS) 2424 return self.expression( 2425 exp.Cache, 2426 this=table, 2427 lazy=lazy, 2428 options=options, 2429 expression=self._parse_select(nested=True), 2430 ) 2431 2432 def _parse_partition(self) -> t.Optional[exp.Partition]: 2433 if not self._match(TokenType.PARTITION): 2434 return None 2435 2436 return self.expression( 2437 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2438 ) 2439 2440 def _parse_value(self) -> exp.Tuple: 2441 if self._match(TokenType.L_PAREN): 2442 expressions = self._parse_csv(self._parse_expression) 2443 self._match_r_paren() 2444 return self.expression(exp.Tuple, expressions=expressions) 2445 2446 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2447 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2448 2449 def _parse_projections(self) -> t.List[exp.Expression]: 2450 return self._parse_expressions() 2451 2452 def _parse_select( 2453 self, 2454 nested: bool = False, 2455 table: bool = False, 2456 parse_subquery_alias: bool = True, 2457 parse_set_operation: bool = True, 2458 ) -> t.Optional[exp.Expression]: 2459 cte = self._parse_with() 2460 2461 if cte: 2462 this = self._parse_statement() 2463 2464 if not this: 2465 self.raise_error("Failed to parse any statement following CTE") 2466 return cte 2467 2468 if "with" in this.arg_types: 2469 this.set("with", cte) 2470 else: 2471 self.raise_error(f"{this.key} does not support CTE") 2472 this = cte 2473 2474 return this 2475 2476 # duckdb supports leading with FROM x 2477 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2478 2479 if self._match(TokenType.SELECT): 2480 comments = self._prev_comments 2481 2482 hint = self._parse_hint() 2483 all_ = self._match(TokenType.ALL) 2484 distinct = self._match_set(self.DISTINCT_TOKENS) 2485 2486 kind = ( 2487 self._match(TokenType.ALIAS) 2488 and self._match_texts(("STRUCT", "VALUE")) 2489 and self._prev.text.upper() 2490 ) 2491 2492 if distinct: 2493 distinct = self.expression( 2494 exp.Distinct, 2495 on=self._parse_value() if self._match(TokenType.ON) else None, 2496 ) 2497 2498 if all_ and distinct: 2499 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2500 2501 limit = self._parse_limit(top=True) 2502 projections = self._parse_projections() 2503 2504 this = self.expression( 2505 exp.Select, 2506 kind=kind, 2507 hint=hint, 2508 distinct=distinct, 2509 expressions=projections, 2510 limit=limit, 2511 ) 2512 this.comments = comments 2513 2514 into = self._parse_into() 2515 if into: 2516 this.set("into", into) 2517 2518 if not from_: 2519 from_ = self._parse_from() 2520 2521 if from_: 2522 this.set("from", from_) 2523 2524 this = self._parse_query_modifiers(this) 2525 elif (table or nested) and self._match(TokenType.L_PAREN): 2526 if self._match(TokenType.PIVOT): 2527 this = self._parse_simplified_pivot() 2528 elif self._match(TokenType.FROM): 2529 this = exp.select("*").from_( 2530 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2531 ) 2532 else: 2533 this = ( 2534 self._parse_table() 2535 if table 2536 else self._parse_select(nested=True, parse_set_operation=False) 2537 ) 2538 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2539 2540 self._match_r_paren() 2541 2542 # We return early here so that the UNION isn't attached to the subquery by the 2543 # following call to _parse_set_operations, but instead becomes the parent node 2544 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2545 elif self._match(TokenType.VALUES, advance=False): 2546 this = self._parse_derived_table_values() 2547 elif from_: 2548 this = exp.select("*").from_(from_.this, copy=False) 2549 else: 2550 this = None 2551 2552 if parse_set_operation: 2553 return self._parse_set_operations(this) 2554 return this 2555 2556 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2557 if not skip_with_token and not self._match(TokenType.WITH): 2558 return None 2559 2560 comments = self._prev_comments 2561 recursive = self._match(TokenType.RECURSIVE) 2562 2563 expressions = [] 2564 while True: 2565 expressions.append(self._parse_cte()) 2566 2567 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2568 break 2569 else: 2570 self._match(TokenType.WITH) 2571 2572 return self.expression( 2573 exp.With, comments=comments, expressions=expressions, recursive=recursive 2574 ) 2575 2576 def _parse_cte(self) -> exp.CTE: 2577 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2578 if not alias or not alias.this: 2579 self.raise_error("Expected CTE to have alias") 2580 2581 self._match(TokenType.ALIAS) 2582 2583 if self._match_text_seq("NOT", "MATERIALIZED"): 2584 materialized = False 2585 elif self._match_text_seq("MATERIALIZED"): 2586 materialized = True 2587 else: 2588 materialized = None 2589 2590 return self.expression( 2591 exp.CTE, 2592 this=self._parse_wrapped(self._parse_statement), 2593 alias=alias, 2594 materialized=materialized, 2595 ) 2596 2597 def _parse_table_alias( 2598 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2599 ) -> t.Optional[exp.TableAlias]: 2600 any_token = self._match(TokenType.ALIAS) 2601 alias = ( 2602 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2603 or self._parse_string_as_identifier() 2604 ) 2605 2606 index = self._index 2607 if self._match(TokenType.L_PAREN): 2608 columns = self._parse_csv(self._parse_function_parameter) 2609 self._match_r_paren() if columns else self._retreat(index) 2610 else: 2611 columns = None 2612 2613 if not alias and not columns: 2614 return None 2615 2616 return self.expression(exp.TableAlias, this=alias, columns=columns) 2617 2618 def _parse_subquery( 2619 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2620 ) -> t.Optional[exp.Subquery]: 2621 if not this: 2622 return None 2623 2624 return self.expression( 2625 exp.Subquery, 2626 this=this, 2627 pivots=self._parse_pivots(), 2628 alias=self._parse_table_alias() if parse_alias else None, 2629 ) 2630 2631 def _implicit_unnests_to_explicit(self, this: E) -> E: 2632 from sqlglot.optimizer.normalize_identifiers import ( 2633 normalize_identifiers as _norm, 2634 ) 2635 2636 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2637 for i, join in enumerate(this.args.get("joins") or []): 2638 table = join.this 2639 normalized_table = table.copy() 2640 normalized_table.meta["maybe_column"] = True 2641 normalized_table = _norm(normalized_table, dialect=self.dialect) 2642 2643 if isinstance(table, exp.Table) and not join.args.get("on"): 2644 if normalized_table.parts[0].name in refs: 2645 table_as_column = table.to_column() 2646 unnest = exp.Unnest(expressions=[table_as_column]) 2647 2648 # Table.to_column creates a parent Alias node that we want to convert to 2649 # a TableAlias and attach to the Unnest, so it matches the parser's output 2650 if isinstance(table.args.get("alias"), exp.TableAlias): 2651 table_as_column.replace(table_as_column.this) 2652 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2653 2654 table.replace(unnest) 2655 2656 refs.add(normalized_table.alias_or_name) 2657 2658 return this 2659 2660 def _parse_query_modifiers( 2661 self, this: t.Optional[exp.Expression] 2662 ) -> t.Optional[exp.Expression]: 2663 if isinstance(this, (exp.Query, exp.Table)): 2664 for join in self._parse_joins(): 2665 this.append("joins", join) 2666 for lateral in iter(self._parse_lateral, None): 2667 this.append("laterals", lateral) 2668 2669 while True: 2670 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2671 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2672 key, expression = parser(self) 2673 2674 if expression: 2675 this.set(key, expression) 2676 if key == "limit": 2677 offset = expression.args.pop("offset", None) 2678 2679 if offset: 2680 offset = exp.Offset(expression=offset) 2681 this.set("offset", offset) 2682 2683 limit_by_expressions = expression.expressions 2684 expression.set("expressions", None) 2685 offset.set("expressions", limit_by_expressions) 2686 continue 2687 break 2688 2689 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2690 this = self._implicit_unnests_to_explicit(this) 2691 2692 return this 2693 2694 def _parse_hint(self) -> t.Optional[exp.Hint]: 2695 if self._match(TokenType.HINT): 2696 hints = [] 2697 for hint in iter( 2698 lambda: self._parse_csv( 2699 lambda: self._parse_function() or self._parse_var(upper=True) 2700 ), 2701 [], 2702 ): 2703 hints.extend(hint) 2704 2705 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2706 self.raise_error("Expected */ after HINT") 2707 2708 return self.expression(exp.Hint, expressions=hints) 2709 2710 return None 2711 2712 def _parse_into(self) -> t.Optional[exp.Into]: 2713 if not self._match(TokenType.INTO): 2714 return None 2715 2716 temp = self._match(TokenType.TEMPORARY) 2717 unlogged = self._match_text_seq("UNLOGGED") 2718 self._match(TokenType.TABLE) 2719 2720 return self.expression( 2721 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2722 ) 2723 2724 def _parse_from( 2725 self, joins: bool = False, skip_from_token: bool = False 2726 ) -> t.Optional[exp.From]: 2727 if not skip_from_token and not self._match(TokenType.FROM): 2728 return None 2729 2730 return self.expression( 2731 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2732 ) 2733 2734 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2735 if not self._match(TokenType.MATCH_RECOGNIZE): 2736 return None 2737 2738 self._match_l_paren() 2739 2740 partition = self._parse_partition_by() 2741 order = self._parse_order() 2742 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2743 2744 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2745 rows = exp.var("ONE ROW PER MATCH") 2746 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2747 text = "ALL ROWS PER MATCH" 2748 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2749 text += " SHOW EMPTY MATCHES" 2750 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2751 text += " OMIT EMPTY MATCHES" 2752 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2753 text += " WITH UNMATCHED ROWS" 2754 rows = exp.var(text) 2755 else: 2756 rows = None 2757 2758 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2759 text = "AFTER MATCH SKIP" 2760 if self._match_text_seq("PAST", "LAST", "ROW"): 2761 text += " PAST LAST ROW" 2762 elif self._match_text_seq("TO", "NEXT", "ROW"): 2763 text += " TO NEXT ROW" 2764 elif self._match_text_seq("TO", "FIRST"): 2765 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2766 elif self._match_text_seq("TO", "LAST"): 2767 text += f" TO LAST {self._advance_any().text}" # type: ignore 2768 after = exp.var(text) 2769 else: 2770 after = None 2771 2772 if self._match_text_seq("PATTERN"): 2773 self._match_l_paren() 2774 2775 if not self._curr: 2776 self.raise_error("Expecting )", self._curr) 2777 2778 paren = 1 2779 start = self._curr 2780 2781 while self._curr and paren > 0: 2782 if self._curr.token_type == TokenType.L_PAREN: 2783 paren += 1 2784 if self._curr.token_type == TokenType.R_PAREN: 2785 paren -= 1 2786 2787 end = self._prev 2788 self._advance() 2789 2790 if paren > 0: 2791 self.raise_error("Expecting )", self._curr) 2792 2793 pattern = exp.var(self._find_sql(start, end)) 2794 else: 2795 pattern = None 2796 2797 define = ( 2798 self._parse_csv(self._parse_name_as_expression) 2799 if self._match_text_seq("DEFINE") 2800 else None 2801 ) 2802 2803 self._match_r_paren() 2804 2805 return self.expression( 2806 exp.MatchRecognize, 2807 partition_by=partition, 2808 order=order, 2809 measures=measures, 2810 rows=rows, 2811 after=after, 2812 pattern=pattern, 2813 define=define, 2814 alias=self._parse_table_alias(), 2815 ) 2816 2817 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2818 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2819 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2820 cross_apply = False 2821 2822 if cross_apply is not None: 2823 this = self._parse_select(table=True) 2824 view = None 2825 outer = None 2826 elif self._match(TokenType.LATERAL): 2827 this = self._parse_select(table=True) 2828 view = self._match(TokenType.VIEW) 2829 outer = self._match(TokenType.OUTER) 2830 else: 2831 return None 2832 2833 if not this: 2834 this = ( 2835 self._parse_unnest() 2836 or self._parse_function() 2837 or self._parse_id_var(any_token=False) 2838 ) 2839 2840 while self._match(TokenType.DOT): 2841 this = exp.Dot( 2842 this=this, 2843 expression=self._parse_function() or self._parse_id_var(any_token=False), 2844 ) 2845 2846 if view: 2847 table = self._parse_id_var(any_token=False) 2848 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2849 table_alias: t.Optional[exp.TableAlias] = self.expression( 2850 exp.TableAlias, this=table, columns=columns 2851 ) 2852 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2853 # We move the alias from the lateral's child node to the lateral itself 2854 table_alias = this.args["alias"].pop() 2855 else: 2856 table_alias = self._parse_table_alias() 2857 2858 return self.expression( 2859 exp.Lateral, 2860 this=this, 2861 view=view, 2862 outer=outer, 2863 alias=table_alias, 2864 cross_apply=cross_apply, 2865 ) 2866 2867 def _parse_join_parts( 2868 self, 2869 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2870 return ( 2871 self._match_set(self.JOIN_METHODS) and self._prev, 2872 self._match_set(self.JOIN_SIDES) and self._prev, 2873 self._match_set(self.JOIN_KINDS) and self._prev, 2874 ) 2875 2876 def _parse_join( 2877 self, skip_join_token: bool = False, parse_bracket: bool = False 2878 ) -> t.Optional[exp.Join]: 2879 if self._match(TokenType.COMMA): 2880 return self.expression(exp.Join, this=self._parse_table()) 2881 2882 index = self._index 2883 method, side, kind = self._parse_join_parts() 2884 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2885 join = self._match(TokenType.JOIN) 2886 2887 if not skip_join_token and not join: 2888 self._retreat(index) 2889 kind = None 2890 method = None 2891 side = None 2892 2893 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2894 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2895 2896 if not skip_join_token and not join and not outer_apply and not cross_apply: 2897 return None 2898 2899 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2900 2901 if method: 2902 kwargs["method"] = method.text 2903 if side: 2904 kwargs["side"] = side.text 2905 if kind: 2906 kwargs["kind"] = kind.text 2907 if hint: 2908 kwargs["hint"] = hint 2909 2910 if self._match(TokenType.MATCH_CONDITION): 2911 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 2912 2913 if self._match(TokenType.ON): 2914 kwargs["on"] = self._parse_conjunction() 2915 elif self._match(TokenType.USING): 2916 kwargs["using"] = self._parse_wrapped_id_vars() 2917 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 2918 kind and kind.token_type == TokenType.CROSS 2919 ): 2920 index = self._index 2921 joins: t.Optional[list] = list(self._parse_joins()) 2922 2923 if joins and self._match(TokenType.ON): 2924 kwargs["on"] = self._parse_conjunction() 2925 elif joins and self._match(TokenType.USING): 2926 kwargs["using"] = self._parse_wrapped_id_vars() 2927 else: 2928 joins = None 2929 self._retreat(index) 2930 2931 kwargs["this"].set("joins", joins if joins else None) 2932 2933 comments = [c for token in (method, side, kind) if token for c in token.comments] 2934 return self.expression(exp.Join, comments=comments, **kwargs) 2935 2936 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2937 this = self._parse_conjunction() 2938 2939 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2940 return this 2941 2942 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2943 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2944 2945 return this 2946 2947 def _parse_index_params(self) -> exp.IndexParameters: 2948 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2949 2950 if self._match(TokenType.L_PAREN, advance=False): 2951 columns = self._parse_wrapped_csv(self._parse_with_operator) 2952 else: 2953 columns = None 2954 2955 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2956 partition_by = self._parse_partition_by() 2957 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 2958 tablespace = ( 2959 self._parse_var(any_token=True) 2960 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 2961 else None 2962 ) 2963 where = self._parse_where() 2964 2965 return self.expression( 2966 exp.IndexParameters, 2967 using=using, 2968 columns=columns, 2969 include=include, 2970 partition_by=partition_by, 2971 where=where, 2972 with_storage=with_storage, 2973 tablespace=tablespace, 2974 ) 2975 2976 def _parse_index( 2977 self, 2978 index: t.Optional[exp.Expression] = None, 2979 ) -> t.Optional[exp.Index]: 2980 if index: 2981 unique = None 2982 primary = None 2983 amp = None 2984 2985 self._match(TokenType.ON) 2986 self._match(TokenType.TABLE) # hive 2987 table = self._parse_table_parts(schema=True) 2988 else: 2989 unique = self._match(TokenType.UNIQUE) 2990 primary = self._match_text_seq("PRIMARY") 2991 amp = self._match_text_seq("AMP") 2992 2993 if not self._match(TokenType.INDEX): 2994 return None 2995 2996 index = self._parse_id_var() 2997 table = None 2998 2999 params = self._parse_index_params() 3000 3001 return self.expression( 3002 exp.Index, 3003 this=index, 3004 table=table, 3005 unique=unique, 3006 primary=primary, 3007 amp=amp, 3008 params=params, 3009 ) 3010 3011 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3012 hints: t.List[exp.Expression] = [] 3013 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3014 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3015 hints.append( 3016 self.expression( 3017 exp.WithTableHint, 3018 expressions=self._parse_csv( 3019 lambda: self._parse_function() or self._parse_var(any_token=True) 3020 ), 3021 ) 3022 ) 3023 self._match_r_paren() 3024 else: 3025 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3026 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3027 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3028 3029 self._match_texts(("INDEX", "KEY")) 3030 if self._match(TokenType.FOR): 3031 hint.set("target", self._advance_any() and self._prev.text.upper()) 3032 3033 hint.set("expressions", self._parse_wrapped_id_vars()) 3034 hints.append(hint) 3035 3036 return hints or None 3037 3038 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3039 return ( 3040 (not schema and self._parse_function(optional_parens=False)) 3041 or self._parse_id_var(any_token=False) 3042 or self._parse_string_as_identifier() 3043 or self._parse_placeholder() 3044 ) 3045 3046 def _parse_table_parts( 3047 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3048 ) -> exp.Table: 3049 catalog = None 3050 db = None 3051 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3052 3053 while self._match(TokenType.DOT): 3054 if catalog: 3055 # This allows nesting the table in arbitrarily many dot expressions if needed 3056 table = self.expression( 3057 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3058 ) 3059 else: 3060 catalog = db 3061 db = table 3062 # "" used for tsql FROM a..b case 3063 table = self._parse_table_part(schema=schema) or "" 3064 3065 if ( 3066 wildcard 3067 and self._is_connected() 3068 and (isinstance(table, exp.Identifier) or not table) 3069 and self._match(TokenType.STAR) 3070 ): 3071 if isinstance(table, exp.Identifier): 3072 table.args["this"] += "*" 3073 else: 3074 table = exp.Identifier(this="*") 3075 3076 if is_db_reference: 3077 catalog = db 3078 db = table 3079 table = None 3080 3081 if not table and not is_db_reference: 3082 self.raise_error(f"Expected table name but got {self._curr}") 3083 if not db and is_db_reference: 3084 self.raise_error(f"Expected database name but got {self._curr}") 3085 3086 return self.expression( 3087 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 3088 ) 3089 3090 def _parse_table( 3091 self, 3092 schema: bool = False, 3093 joins: bool = False, 3094 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3095 parse_bracket: bool = False, 3096 is_db_reference: bool = False, 3097 ) -> t.Optional[exp.Expression]: 3098 lateral = self._parse_lateral() 3099 if lateral: 3100 return lateral 3101 3102 unnest = self._parse_unnest() 3103 if unnest: 3104 return unnest 3105 3106 values = self._parse_derived_table_values() 3107 if values: 3108 return values 3109 3110 subquery = self._parse_select(table=True) 3111 if subquery: 3112 if not subquery.args.get("pivots"): 3113 subquery.set("pivots", self._parse_pivots()) 3114 return subquery 3115 3116 bracket = parse_bracket and self._parse_bracket(None) 3117 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3118 3119 only = self._match(TokenType.ONLY) 3120 3121 this = t.cast( 3122 exp.Expression, 3123 bracket 3124 or self._parse_bracket( 3125 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3126 ), 3127 ) 3128 3129 if only: 3130 this.set("only", only) 3131 3132 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3133 self._match_text_seq("*") 3134 3135 if schema: 3136 return self._parse_schema(this=this) 3137 3138 version = self._parse_version() 3139 3140 if version: 3141 this.set("version", version) 3142 3143 if self.dialect.ALIAS_POST_TABLESAMPLE: 3144 table_sample = self._parse_table_sample() 3145 3146 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3147 if alias: 3148 this.set("alias", alias) 3149 3150 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3151 return self.expression( 3152 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3153 ) 3154 3155 this.set("hints", self._parse_table_hints()) 3156 3157 if not this.args.get("pivots"): 3158 this.set("pivots", self._parse_pivots()) 3159 3160 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3161 table_sample = self._parse_table_sample() 3162 3163 if table_sample: 3164 table_sample.set("this", this) 3165 this = table_sample 3166 3167 if joins: 3168 for join in self._parse_joins(): 3169 this.append("joins", join) 3170 3171 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3172 this.set("ordinality", True) 3173 this.set("alias", self._parse_table_alias()) 3174 3175 return this 3176 3177 def _parse_version(self) -> t.Optional[exp.Version]: 3178 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3179 this = "TIMESTAMP" 3180 elif self._match(TokenType.VERSION_SNAPSHOT): 3181 this = "VERSION" 3182 else: 3183 return None 3184 3185 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3186 kind = self._prev.text.upper() 3187 start = self._parse_bitwise() 3188 self._match_texts(("TO", "AND")) 3189 end = self._parse_bitwise() 3190 expression: t.Optional[exp.Expression] = self.expression( 3191 exp.Tuple, expressions=[start, end] 3192 ) 3193 elif self._match_text_seq("CONTAINED", "IN"): 3194 kind = "CONTAINED IN" 3195 expression = self.expression( 3196 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3197 ) 3198 elif self._match(TokenType.ALL): 3199 kind = "ALL" 3200 expression = None 3201 else: 3202 self._match_text_seq("AS", "OF") 3203 kind = "AS OF" 3204 expression = self._parse_type() 3205 3206 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3207 3208 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3209 if not self._match(TokenType.UNNEST): 3210 return None 3211 3212 expressions = self._parse_wrapped_csv(self._parse_equality) 3213 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3214 3215 alias = self._parse_table_alias() if with_alias else None 3216 3217 if alias: 3218 if self.dialect.UNNEST_COLUMN_ONLY: 3219 if alias.args.get("columns"): 3220 self.raise_error("Unexpected extra column alias in unnest.") 3221 3222 alias.set("columns", [alias.this]) 3223 alias.set("this", None) 3224 3225 columns = alias.args.get("columns") or [] 3226 if offset and len(expressions) < len(columns): 3227 offset = columns.pop() 3228 3229 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3230 self._match(TokenType.ALIAS) 3231 offset = self._parse_id_var( 3232 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3233 ) or exp.to_identifier("offset") 3234 3235 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3236 3237 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3238 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3239 if not is_derived and not self._match_text_seq("VALUES"): 3240 return None 3241 3242 expressions = self._parse_csv(self._parse_value) 3243 alias = self._parse_table_alias() 3244 3245 if is_derived: 3246 self._match_r_paren() 3247 3248 return self.expression( 3249 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3250 ) 3251 3252 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3253 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3254 as_modifier and self._match_text_seq("USING", "SAMPLE") 3255 ): 3256 return None 3257 3258 bucket_numerator = None 3259 bucket_denominator = None 3260 bucket_field = None 3261 percent = None 3262 size = None 3263 seed = None 3264 3265 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3266 matched_l_paren = self._match(TokenType.L_PAREN) 3267 3268 if self.TABLESAMPLE_CSV: 3269 num = None 3270 expressions = self._parse_csv(self._parse_primary) 3271 else: 3272 expressions = None 3273 num = ( 3274 self._parse_factor() 3275 if self._match(TokenType.NUMBER, advance=False) 3276 else self._parse_primary() or self._parse_placeholder() 3277 ) 3278 3279 if self._match_text_seq("BUCKET"): 3280 bucket_numerator = self._parse_number() 3281 self._match_text_seq("OUT", "OF") 3282 bucket_denominator = bucket_denominator = self._parse_number() 3283 self._match(TokenType.ON) 3284 bucket_field = self._parse_field() 3285 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3286 percent = num 3287 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3288 size = num 3289 else: 3290 percent = num 3291 3292 if matched_l_paren: 3293 self._match_r_paren() 3294 3295 if self._match(TokenType.L_PAREN): 3296 method = self._parse_var(upper=True) 3297 seed = self._match(TokenType.COMMA) and self._parse_number() 3298 self._match_r_paren() 3299 elif self._match_texts(("SEED", "REPEATABLE")): 3300 seed = self._parse_wrapped(self._parse_number) 3301 3302 return self.expression( 3303 exp.TableSample, 3304 expressions=expressions, 3305 method=method, 3306 bucket_numerator=bucket_numerator, 3307 bucket_denominator=bucket_denominator, 3308 bucket_field=bucket_field, 3309 percent=percent, 3310 size=size, 3311 seed=seed, 3312 ) 3313 3314 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3315 return list(iter(self._parse_pivot, None)) or None 3316 3317 def _parse_joins(self) -> t.Iterator[exp.Join]: 3318 return iter(self._parse_join, None) 3319 3320 # https://duckdb.org/docs/sql/statements/pivot 3321 def _parse_simplified_pivot(self) -> exp.Pivot: 3322 def _parse_on() -> t.Optional[exp.Expression]: 3323 this = self._parse_bitwise() 3324 return self._parse_in(this) if self._match(TokenType.IN) else this 3325 3326 this = self._parse_table() 3327 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3328 using = self._match(TokenType.USING) and self._parse_csv( 3329 lambda: self._parse_alias(self._parse_function()) 3330 ) 3331 group = self._parse_group() 3332 return self.expression( 3333 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3334 ) 3335 3336 def _parse_pivot_in(self) -> exp.In: 3337 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3338 this = self._parse_conjunction() 3339 3340 self._match(TokenType.ALIAS) 3341 alias = self._parse_field() 3342 if alias: 3343 return self.expression(exp.PivotAlias, this=this, alias=alias) 3344 3345 return this 3346 3347 value = self._parse_column() 3348 3349 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3350 self.raise_error("Expecting IN (") 3351 3352 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3353 3354 self._match_r_paren() 3355 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3356 3357 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3358 index = self._index 3359 include_nulls = None 3360 3361 if self._match(TokenType.PIVOT): 3362 unpivot = False 3363 elif self._match(TokenType.UNPIVOT): 3364 unpivot = True 3365 3366 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3367 if self._match_text_seq("INCLUDE", "NULLS"): 3368 include_nulls = True 3369 elif self._match_text_seq("EXCLUDE", "NULLS"): 3370 include_nulls = False 3371 else: 3372 return None 3373 3374 expressions = [] 3375 3376 if not self._match(TokenType.L_PAREN): 3377 self._retreat(index) 3378 return None 3379 3380 if unpivot: 3381 expressions = self._parse_csv(self._parse_column) 3382 else: 3383 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3384 3385 if not expressions: 3386 self.raise_error("Failed to parse PIVOT's aggregation list") 3387 3388 if not self._match(TokenType.FOR): 3389 self.raise_error("Expecting FOR") 3390 3391 field = self._parse_pivot_in() 3392 3393 self._match_r_paren() 3394 3395 pivot = self.expression( 3396 exp.Pivot, 3397 expressions=expressions, 3398 field=field, 3399 unpivot=unpivot, 3400 include_nulls=include_nulls, 3401 ) 3402 3403 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3404 pivot.set("alias", self._parse_table_alias()) 3405 3406 if not unpivot: 3407 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3408 3409 columns: t.List[exp.Expression] = [] 3410 for fld in pivot.args["field"].expressions: 3411 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3412 for name in names: 3413 if self.PREFIXED_PIVOT_COLUMNS: 3414 name = f"{name}_{field_name}" if name else field_name 3415 else: 3416 name = f"{field_name}_{name}" if name else field_name 3417 3418 columns.append(exp.to_identifier(name)) 3419 3420 pivot.set("columns", columns) 3421 3422 return pivot 3423 3424 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3425 return [agg.alias for agg in aggregations] 3426 3427 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3428 if not skip_where_token and not self._match(TokenType.PREWHERE): 3429 return None 3430 3431 return self.expression( 3432 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3433 ) 3434 3435 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3436 if not skip_where_token and not self._match(TokenType.WHERE): 3437 return None 3438 3439 return self.expression( 3440 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3441 ) 3442 3443 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3444 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3445 return None 3446 3447 elements = defaultdict(list) 3448 3449 if self._match(TokenType.ALL): 3450 return self.expression(exp.Group, all=True) 3451 3452 while True: 3453 expressions = self._parse_csv(self._parse_conjunction) 3454 if expressions: 3455 elements["expressions"].extend(expressions) 3456 3457 grouping_sets = self._parse_grouping_sets() 3458 if grouping_sets: 3459 elements["grouping_sets"].extend(grouping_sets) 3460 3461 rollup = None 3462 cube = None 3463 totals = None 3464 3465 index = self._index 3466 with_ = self._match(TokenType.WITH) 3467 if self._match(TokenType.ROLLUP): 3468 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3469 elements["rollup"].extend(ensure_list(rollup)) 3470 3471 if self._match(TokenType.CUBE): 3472 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3473 elements["cube"].extend(ensure_list(cube)) 3474 3475 if self._match_text_seq("TOTALS"): 3476 totals = True 3477 elements["totals"] = True # type: ignore 3478 3479 if not (grouping_sets or rollup or cube or totals): 3480 if with_: 3481 self._retreat(index) 3482 break 3483 3484 return self.expression(exp.Group, **elements) # type: ignore 3485 3486 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3487 if not self._match(TokenType.GROUPING_SETS): 3488 return None 3489 3490 return self._parse_wrapped_csv(self._parse_grouping_set) 3491 3492 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3493 if self._match(TokenType.L_PAREN): 3494 grouping_set = self._parse_csv(self._parse_column) 3495 self._match_r_paren() 3496 return self.expression(exp.Tuple, expressions=grouping_set) 3497 3498 return self._parse_column() 3499 3500 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3501 if not skip_having_token and not self._match(TokenType.HAVING): 3502 return None 3503 return self.expression(exp.Having, this=self._parse_conjunction()) 3504 3505 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3506 if not self._match(TokenType.QUALIFY): 3507 return None 3508 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3509 3510 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3511 if skip_start_token: 3512 start = None 3513 elif self._match(TokenType.START_WITH): 3514 start = self._parse_conjunction() 3515 else: 3516 return None 3517 3518 self._match(TokenType.CONNECT_BY) 3519 nocycle = self._match_text_seq("NOCYCLE") 3520 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3521 exp.Prior, this=self._parse_bitwise() 3522 ) 3523 connect = self._parse_conjunction() 3524 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3525 3526 if not start and self._match(TokenType.START_WITH): 3527 start = self._parse_conjunction() 3528 3529 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3530 3531 def _parse_name_as_expression(self) -> exp.Alias: 3532 return self.expression( 3533 exp.Alias, 3534 alias=self._parse_id_var(any_token=True), 3535 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3536 ) 3537 3538 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3539 if self._match_text_seq("INTERPOLATE"): 3540 return self._parse_wrapped_csv(self._parse_name_as_expression) 3541 return None 3542 3543 def _parse_order( 3544 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3545 ) -> t.Optional[exp.Expression]: 3546 siblings = None 3547 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3548 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3549 return this 3550 3551 siblings = True 3552 3553 return self.expression( 3554 exp.Order, 3555 this=this, 3556 expressions=self._parse_csv(self._parse_ordered), 3557 interpolate=self._parse_interpolate(), 3558 siblings=siblings, 3559 ) 3560 3561 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3562 if not self._match(token): 3563 return None 3564 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3565 3566 def _parse_ordered( 3567 self, parse_method: t.Optional[t.Callable] = None 3568 ) -> t.Optional[exp.Ordered]: 3569 this = parse_method() if parse_method else self._parse_conjunction() 3570 if not this: 3571 return None 3572 3573 asc = self._match(TokenType.ASC) 3574 desc = self._match(TokenType.DESC) or (asc and False) 3575 3576 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3577 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3578 3579 nulls_first = is_nulls_first or False 3580 explicitly_null_ordered = is_nulls_first or is_nulls_last 3581 3582 if ( 3583 not explicitly_null_ordered 3584 and ( 3585 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3586 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3587 ) 3588 and self.dialect.NULL_ORDERING != "nulls_are_last" 3589 ): 3590 nulls_first = True 3591 3592 if self._match_text_seq("WITH", "FILL"): 3593 with_fill = self.expression( 3594 exp.WithFill, 3595 **{ # type: ignore 3596 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3597 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3598 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3599 }, 3600 ) 3601 else: 3602 with_fill = None 3603 3604 return self.expression( 3605 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3606 ) 3607 3608 def _parse_limit( 3609 self, 3610 this: t.Optional[exp.Expression] = None, 3611 top: bool = False, 3612 skip_limit_token: bool = False, 3613 ) -> t.Optional[exp.Expression]: 3614 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3615 comments = self._prev_comments 3616 if top: 3617 limit_paren = self._match(TokenType.L_PAREN) 3618 expression = self._parse_term() if limit_paren else self._parse_number() 3619 3620 if limit_paren: 3621 self._match_r_paren() 3622 else: 3623 expression = self._parse_term() 3624 3625 if self._match(TokenType.COMMA): 3626 offset = expression 3627 expression = self._parse_term() 3628 else: 3629 offset = None 3630 3631 limit_exp = self.expression( 3632 exp.Limit, 3633 this=this, 3634 expression=expression, 3635 offset=offset, 3636 comments=comments, 3637 expressions=self._parse_limit_by(), 3638 ) 3639 3640 return limit_exp 3641 3642 if self._match(TokenType.FETCH): 3643 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3644 direction = self._prev.text.upper() if direction else "FIRST" 3645 3646 count = self._parse_field(tokens=self.FETCH_TOKENS) 3647 percent = self._match(TokenType.PERCENT) 3648 3649 self._match_set((TokenType.ROW, TokenType.ROWS)) 3650 3651 only = self._match_text_seq("ONLY") 3652 with_ties = self._match_text_seq("WITH", "TIES") 3653 3654 if only and with_ties: 3655 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3656 3657 return self.expression( 3658 exp.Fetch, 3659 direction=direction, 3660 count=count, 3661 percent=percent, 3662 with_ties=with_ties, 3663 ) 3664 3665 return this 3666 3667 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3668 if not self._match(TokenType.OFFSET): 3669 return this 3670 3671 count = self._parse_term() 3672 self._match_set((TokenType.ROW, TokenType.ROWS)) 3673 3674 return self.expression( 3675 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3676 ) 3677 3678 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3679 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3680 3681 def _parse_locks(self) -> t.List[exp.Lock]: 3682 locks = [] 3683 while True: 3684 if self._match_text_seq("FOR", "UPDATE"): 3685 update = True 3686 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3687 "LOCK", "IN", "SHARE", "MODE" 3688 ): 3689 update = False 3690 else: 3691 break 3692 3693 expressions = None 3694 if self._match_text_seq("OF"): 3695 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3696 3697 wait: t.Optional[bool | exp.Expression] = None 3698 if self._match_text_seq("NOWAIT"): 3699 wait = True 3700 elif self._match_text_seq("WAIT"): 3701 wait = self._parse_primary() 3702 elif self._match_text_seq("SKIP", "LOCKED"): 3703 wait = False 3704 3705 locks.append( 3706 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3707 ) 3708 3709 return locks 3710 3711 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3712 while this and self._match_set(self.SET_OPERATIONS): 3713 token_type = self._prev.token_type 3714 3715 if token_type == TokenType.UNION: 3716 operation = exp.Union 3717 elif token_type == TokenType.EXCEPT: 3718 operation = exp.Except 3719 else: 3720 operation = exp.Intersect 3721 3722 comments = self._prev.comments 3723 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3724 by_name = self._match_text_seq("BY", "NAME") 3725 expression = self._parse_select(nested=True, parse_set_operation=False) 3726 3727 this = self.expression( 3728 operation, 3729 comments=comments, 3730 this=this, 3731 distinct=distinct, 3732 by_name=by_name, 3733 expression=expression, 3734 ) 3735 3736 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3737 expression = this.expression 3738 3739 if expression: 3740 for arg in self.UNION_MODIFIERS: 3741 expr = expression.args.get(arg) 3742 if expr: 3743 this.set(arg, expr.pop()) 3744 3745 return this 3746 3747 def _parse_expression(self) -> t.Optional[exp.Expression]: 3748 return self._parse_alias(self._parse_conjunction()) 3749 3750 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3751 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3752 3753 def _parse_equality(self) -> t.Optional[exp.Expression]: 3754 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3755 3756 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3757 return self._parse_tokens(self._parse_range, self.COMPARISON) 3758 3759 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3760 this = this or self._parse_bitwise() 3761 negate = self._match(TokenType.NOT) 3762 3763 if self._match_set(self.RANGE_PARSERS): 3764 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3765 if not expression: 3766 return this 3767 3768 this = expression 3769 elif self._match(TokenType.ISNULL): 3770 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3771 3772 # Postgres supports ISNULL and NOTNULL for conditions. 3773 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3774 if self._match(TokenType.NOTNULL): 3775 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3776 this = self.expression(exp.Not, this=this) 3777 3778 if negate: 3779 this = self.expression(exp.Not, this=this) 3780 3781 if self._match(TokenType.IS): 3782 this = self._parse_is(this) 3783 3784 return this 3785 3786 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3787 index = self._index - 1 3788 negate = self._match(TokenType.NOT) 3789 3790 if self._match_text_seq("DISTINCT", "FROM"): 3791 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3792 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3793 3794 expression = self._parse_null() or self._parse_boolean() 3795 if not expression: 3796 self._retreat(index) 3797 return None 3798 3799 this = self.expression(exp.Is, this=this, expression=expression) 3800 return self.expression(exp.Not, this=this) if negate else this 3801 3802 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3803 unnest = self._parse_unnest(with_alias=False) 3804 if unnest: 3805 this = self.expression(exp.In, this=this, unnest=unnest) 3806 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3807 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3808 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3809 3810 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3811 this = self.expression(exp.In, this=this, query=expressions[0]) 3812 else: 3813 this = self.expression(exp.In, this=this, expressions=expressions) 3814 3815 if matched_l_paren: 3816 self._match_r_paren(this) 3817 elif not self._match(TokenType.R_BRACKET, expression=this): 3818 self.raise_error("Expecting ]") 3819 else: 3820 this = self.expression(exp.In, this=this, field=self._parse_field()) 3821 3822 return this 3823 3824 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3825 low = self._parse_bitwise() 3826 self._match(TokenType.AND) 3827 high = self._parse_bitwise() 3828 return self.expression(exp.Between, this=this, low=low, high=high) 3829 3830 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3831 if not self._match(TokenType.ESCAPE): 3832 return this 3833 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3834 3835 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3836 index = self._index 3837 3838 if not self._match(TokenType.INTERVAL) and match_interval: 3839 return None 3840 3841 if self._match(TokenType.STRING, advance=False): 3842 this = self._parse_primary() 3843 else: 3844 this = self._parse_term() 3845 3846 if not this or ( 3847 isinstance(this, exp.Column) 3848 and not this.table 3849 and not this.this.quoted 3850 and this.name.upper() == "IS" 3851 ): 3852 self._retreat(index) 3853 return None 3854 3855 unit = self._parse_function() or ( 3856 not self._match(TokenType.ALIAS, advance=False) 3857 and self._parse_var(any_token=True, upper=True) 3858 ) 3859 3860 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3861 # each INTERVAL expression into this canonical form so it's easy to transpile 3862 if this and this.is_number: 3863 this = exp.Literal.string(this.name) 3864 elif this and this.is_string: 3865 parts = this.name.split() 3866 3867 if len(parts) == 2: 3868 if unit: 3869 # This is not actually a unit, it's something else (e.g. a "window side") 3870 unit = None 3871 self._retreat(self._index - 1) 3872 3873 this = exp.Literal.string(parts[0]) 3874 unit = self.expression(exp.Var, this=parts[1].upper()) 3875 3876 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 3877 unit = self.expression( 3878 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 3879 ) 3880 3881 return self.expression(exp.Interval, this=this, unit=unit) 3882 3883 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3884 this = self._parse_term() 3885 3886 while True: 3887 if self._match_set(self.BITWISE): 3888 this = self.expression( 3889 self.BITWISE[self._prev.token_type], 3890 this=this, 3891 expression=self._parse_term(), 3892 ) 3893 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3894 this = self.expression( 3895 exp.DPipe, 3896 this=this, 3897 expression=self._parse_term(), 3898 safe=not self.dialect.STRICT_STRING_CONCAT, 3899 ) 3900 elif self._match(TokenType.DQMARK): 3901 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3902 elif self._match_pair(TokenType.LT, TokenType.LT): 3903 this = self.expression( 3904 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3905 ) 3906 elif self._match_pair(TokenType.GT, TokenType.GT): 3907 this = self.expression( 3908 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3909 ) 3910 else: 3911 break 3912 3913 return this 3914 3915 def _parse_term(self) -> t.Optional[exp.Expression]: 3916 return self._parse_tokens(self._parse_factor, self.TERM) 3917 3918 def _parse_factor(self) -> t.Optional[exp.Expression]: 3919 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3920 this = parse_method() 3921 3922 while self._match_set(self.FACTOR): 3923 this = self.expression( 3924 self.FACTOR[self._prev.token_type], 3925 this=this, 3926 comments=self._prev_comments, 3927 expression=parse_method(), 3928 ) 3929 if isinstance(this, exp.Div): 3930 this.args["typed"] = self.dialect.TYPED_DIVISION 3931 this.args["safe"] = self.dialect.SAFE_DIVISION 3932 3933 return this 3934 3935 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3936 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3937 3938 def _parse_unary(self) -> t.Optional[exp.Expression]: 3939 if self._match_set(self.UNARY_PARSERS): 3940 return self.UNARY_PARSERS[self._prev.token_type](self) 3941 return self._parse_at_time_zone(self._parse_type()) 3942 3943 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3944 interval = parse_interval and self._parse_interval() 3945 if interval: 3946 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3947 while True: 3948 index = self._index 3949 self._match(TokenType.PLUS) 3950 3951 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3952 self._retreat(index) 3953 break 3954 3955 interval = self.expression( # type: ignore 3956 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3957 ) 3958 3959 return interval 3960 3961 index = self._index 3962 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3963 this = self._parse_column() 3964 3965 if data_type: 3966 if isinstance(this, exp.Literal): 3967 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3968 if parser: 3969 return parser(self, this, data_type) 3970 return self.expression(exp.Cast, this=this, to=data_type) 3971 if not data_type.expressions: 3972 self._retreat(index) 3973 return self._parse_column() 3974 return self._parse_column_ops(data_type) 3975 3976 return this and self._parse_column_ops(this) 3977 3978 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3979 this = self._parse_type() 3980 if not this: 3981 return None 3982 3983 if isinstance(this, exp.Column) and not this.table: 3984 this = exp.var(this.name.upper()) 3985 3986 return self.expression( 3987 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3988 ) 3989 3990 def _parse_types( 3991 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3992 ) -> t.Optional[exp.Expression]: 3993 index = self._index 3994 3995 prefix = self._match_text_seq("SYSUDTLIB", ".") 3996 3997 if not self._match_set(self.TYPE_TOKENS): 3998 identifier = allow_identifiers and self._parse_id_var( 3999 any_token=False, tokens=(TokenType.VAR,) 4000 ) 4001 if identifier: 4002 tokens = self.dialect.tokenize(identifier.name) 4003 4004 if len(tokens) != 1: 4005 self.raise_error("Unexpected identifier", self._prev) 4006 4007 if tokens[0].token_type in self.TYPE_TOKENS: 4008 self._prev = tokens[0] 4009 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4010 type_name = identifier.name 4011 4012 while self._match(TokenType.DOT): 4013 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4014 4015 return exp.DataType.build(type_name, udt=True) 4016 else: 4017 self._retreat(self._index - 1) 4018 return None 4019 else: 4020 return None 4021 4022 type_token = self._prev.token_type 4023 4024 if type_token == TokenType.PSEUDO_TYPE: 4025 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4026 4027 if type_token == TokenType.OBJECT_IDENTIFIER: 4028 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4029 4030 nested = type_token in self.NESTED_TYPE_TOKENS 4031 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4032 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4033 expressions = None 4034 maybe_func = False 4035 4036 if self._match(TokenType.L_PAREN): 4037 if is_struct: 4038 expressions = self._parse_csv(self._parse_struct_types) 4039 elif nested: 4040 expressions = self._parse_csv( 4041 lambda: self._parse_types( 4042 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4043 ) 4044 ) 4045 elif type_token in self.ENUM_TYPE_TOKENS: 4046 expressions = self._parse_csv(self._parse_equality) 4047 elif is_aggregate: 4048 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4049 any_token=False, tokens=(TokenType.VAR,) 4050 ) 4051 if not func_or_ident or not self._match(TokenType.COMMA): 4052 return None 4053 expressions = self._parse_csv( 4054 lambda: self._parse_types( 4055 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4056 ) 4057 ) 4058 expressions.insert(0, func_or_ident) 4059 else: 4060 expressions = self._parse_csv(self._parse_type_size) 4061 4062 if not expressions or not self._match(TokenType.R_PAREN): 4063 self._retreat(index) 4064 return None 4065 4066 maybe_func = True 4067 4068 this: t.Optional[exp.Expression] = None 4069 values: t.Optional[t.List[exp.Expression]] = None 4070 4071 if nested and self._match(TokenType.LT): 4072 if is_struct: 4073 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4074 else: 4075 expressions = self._parse_csv( 4076 lambda: self._parse_types( 4077 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4078 ) 4079 ) 4080 4081 if not self._match(TokenType.GT): 4082 self.raise_error("Expecting >") 4083 4084 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4085 values = self._parse_csv(self._parse_conjunction) 4086 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4087 4088 if type_token in self.TIMESTAMPS: 4089 if self._match_text_seq("WITH", "TIME", "ZONE"): 4090 maybe_func = False 4091 tz_type = ( 4092 exp.DataType.Type.TIMETZ 4093 if type_token in self.TIMES 4094 else exp.DataType.Type.TIMESTAMPTZ 4095 ) 4096 this = exp.DataType(this=tz_type, expressions=expressions) 4097 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4098 maybe_func = False 4099 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4100 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4101 maybe_func = False 4102 elif type_token == TokenType.INTERVAL: 4103 unit = self._parse_var(any_token=True, upper=True) 4104 4105 if self._match_text_seq("TO"): 4106 unit = exp.IntervalSpan( 4107 this=unit, expression=self._parse_var(any_token=True, upper=True) 4108 ) 4109 4110 if unit: 4111 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4112 else: 4113 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4114 4115 if maybe_func and check_func: 4116 index2 = self._index 4117 peek = self._parse_string() 4118 4119 if not peek: 4120 self._retreat(index) 4121 return None 4122 4123 self._retreat(index2) 4124 4125 if not this: 4126 if self._match_text_seq("UNSIGNED"): 4127 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4128 if not unsigned_type_token: 4129 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4130 4131 type_token = unsigned_type_token or type_token 4132 4133 this = exp.DataType( 4134 this=exp.DataType.Type[type_token.value], 4135 expressions=expressions, 4136 nested=nested, 4137 values=values, 4138 prefix=prefix, 4139 ) 4140 4141 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4142 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4143 4144 return this 4145 4146 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4147 index = self._index 4148 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4149 self._match(TokenType.COLON) 4150 column_def = self._parse_column_def(this) 4151 4152 if type_required and ( 4153 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4154 ): 4155 self._retreat(index) 4156 return self._parse_types() 4157 4158 return column_def 4159 4160 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4161 if not self._match_text_seq("AT", "TIME", "ZONE"): 4162 return this 4163 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4164 4165 def _parse_column(self) -> t.Optional[exp.Expression]: 4166 this = self._parse_column_reference() 4167 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4168 4169 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4170 this = self._parse_field() 4171 if ( 4172 not this 4173 and self._match(TokenType.VALUES, advance=False) 4174 and self.VALUES_FOLLOWED_BY_PAREN 4175 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4176 ): 4177 this = self._parse_id_var() 4178 4179 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 4180 4181 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4182 this = self._parse_bracket(this) 4183 4184 while self._match_set(self.COLUMN_OPERATORS): 4185 op_token = self._prev.token_type 4186 op = self.COLUMN_OPERATORS.get(op_token) 4187 4188 if op_token == TokenType.DCOLON: 4189 field = self._parse_types() 4190 if not field: 4191 self.raise_error("Expected type") 4192 elif op and self._curr: 4193 field = self._parse_column_reference() 4194 else: 4195 field = self._parse_field(anonymous_func=True, any_token=True) 4196 4197 if isinstance(field, exp.Func) and this: 4198 # bigquery allows function calls like x.y.count(...) 4199 # SAFE.SUBSTR(...) 4200 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4201 this = exp.replace_tree( 4202 this, 4203 lambda n: ( 4204 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4205 if n.table 4206 else n.this 4207 ) 4208 if isinstance(n, exp.Column) 4209 else n, 4210 ) 4211 4212 if op: 4213 this = op(self, this, field) 4214 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4215 this = self.expression( 4216 exp.Column, 4217 this=field, 4218 table=this.this, 4219 db=this.args.get("table"), 4220 catalog=this.args.get("db"), 4221 ) 4222 else: 4223 this = self.expression(exp.Dot, this=this, expression=field) 4224 this = self._parse_bracket(this) 4225 return this 4226 4227 def _parse_primary(self) -> t.Optional[exp.Expression]: 4228 if self._match_set(self.PRIMARY_PARSERS): 4229 token_type = self._prev.token_type 4230 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4231 4232 if token_type == TokenType.STRING: 4233 expressions = [primary] 4234 while self._match(TokenType.STRING): 4235 expressions.append(exp.Literal.string(self._prev.text)) 4236 4237 if len(expressions) > 1: 4238 return self.expression(exp.Concat, expressions=expressions) 4239 4240 return primary 4241 4242 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4243 return exp.Literal.number(f"0.{self._prev.text}") 4244 4245 if self._match(TokenType.L_PAREN): 4246 comments = self._prev_comments 4247 query = self._parse_select() 4248 4249 if query: 4250 expressions = [query] 4251 else: 4252 expressions = self._parse_expressions() 4253 4254 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4255 4256 if isinstance(this, exp.UNWRAPPED_QUERIES): 4257 this = self._parse_set_operations( 4258 self._parse_subquery(this=this, parse_alias=False) 4259 ) 4260 elif isinstance(this, exp.Subquery): 4261 this = self._parse_subquery( 4262 this=self._parse_set_operations(this), parse_alias=False 4263 ) 4264 elif len(expressions) > 1: 4265 this = self.expression(exp.Tuple, expressions=expressions) 4266 else: 4267 this = self.expression(exp.Paren, this=this) 4268 4269 if this: 4270 this.add_comments(comments) 4271 4272 self._match_r_paren(expression=this) 4273 return this 4274 4275 return None 4276 4277 def _parse_field( 4278 self, 4279 any_token: bool = False, 4280 tokens: t.Optional[t.Collection[TokenType]] = None, 4281 anonymous_func: bool = False, 4282 ) -> t.Optional[exp.Expression]: 4283 return ( 4284 self._parse_primary() 4285 or self._parse_function(anonymous=anonymous_func) 4286 or self._parse_id_var(any_token=any_token, tokens=tokens) 4287 ) 4288 4289 def _parse_function( 4290 self, 4291 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4292 anonymous: bool = False, 4293 optional_parens: bool = True, 4294 ) -> t.Optional[exp.Expression]: 4295 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4296 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4297 fn_syntax = False 4298 if ( 4299 self._match(TokenType.L_BRACE, advance=False) 4300 and self._next 4301 and self._next.text.upper() == "FN" 4302 ): 4303 self._advance(2) 4304 fn_syntax = True 4305 4306 func = self._parse_function_call( 4307 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4308 ) 4309 4310 if fn_syntax: 4311 self._match(TokenType.R_BRACE) 4312 4313 return func 4314 4315 def _parse_function_call( 4316 self, 4317 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4318 anonymous: bool = False, 4319 optional_parens: bool = True, 4320 ) -> t.Optional[exp.Expression]: 4321 if not self._curr: 4322 return None 4323 4324 comments = self._curr.comments 4325 token_type = self._curr.token_type 4326 this = self._curr.text 4327 upper = this.upper() 4328 4329 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4330 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4331 self._advance() 4332 return self._parse_window(parser(self)) 4333 4334 if not self._next or self._next.token_type != TokenType.L_PAREN: 4335 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4336 self._advance() 4337 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4338 4339 return None 4340 4341 if token_type not in self.FUNC_TOKENS: 4342 return None 4343 4344 self._advance(2) 4345 4346 parser = self.FUNCTION_PARSERS.get(upper) 4347 if parser and not anonymous: 4348 this = parser(self) 4349 else: 4350 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4351 4352 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4353 this = self.expression(subquery_predicate, this=self._parse_select()) 4354 self._match_r_paren() 4355 return this 4356 4357 if functions is None: 4358 functions = self.FUNCTIONS 4359 4360 function = functions.get(upper) 4361 4362 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4363 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4364 4365 if alias: 4366 args = self._kv_to_prop_eq(args) 4367 4368 if function and not anonymous: 4369 if "dialect" in function.__code__.co_varnames: 4370 func = function(args, dialect=self.dialect) 4371 else: 4372 func = function(args) 4373 4374 func = self.validate_expression(func, args) 4375 if not self.dialect.NORMALIZE_FUNCTIONS: 4376 func.meta["name"] = this 4377 4378 this = func 4379 else: 4380 if token_type == TokenType.IDENTIFIER: 4381 this = exp.Identifier(this=this, quoted=True) 4382 this = self.expression(exp.Anonymous, this=this, expressions=args) 4383 4384 if isinstance(this, exp.Expression): 4385 this.add_comments(comments) 4386 4387 self._match_r_paren(this) 4388 return self._parse_window(this) 4389 4390 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4391 transformed = [] 4392 4393 for e in expressions: 4394 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4395 if isinstance(e, exp.Alias): 4396 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4397 4398 if not isinstance(e, exp.PropertyEQ): 4399 e = self.expression( 4400 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4401 ) 4402 4403 if isinstance(e.this, exp.Column): 4404 e.this.replace(e.this.this) 4405 4406 transformed.append(e) 4407 4408 return transformed 4409 4410 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4411 return self._parse_column_def(self._parse_id_var()) 4412 4413 def _parse_user_defined_function( 4414 self, kind: t.Optional[TokenType] = None 4415 ) -> t.Optional[exp.Expression]: 4416 this = self._parse_id_var() 4417 4418 while self._match(TokenType.DOT): 4419 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4420 4421 if not self._match(TokenType.L_PAREN): 4422 return this 4423 4424 expressions = self._parse_csv(self._parse_function_parameter) 4425 self._match_r_paren() 4426 return self.expression( 4427 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4428 ) 4429 4430 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4431 literal = self._parse_primary() 4432 if literal: 4433 return self.expression(exp.Introducer, this=token.text, expression=literal) 4434 4435 return self.expression(exp.Identifier, this=token.text) 4436 4437 def _parse_session_parameter(self) -> exp.SessionParameter: 4438 kind = None 4439 this = self._parse_id_var() or self._parse_primary() 4440 4441 if this and self._match(TokenType.DOT): 4442 kind = this.name 4443 this = self._parse_var() or self._parse_primary() 4444 4445 return self.expression(exp.SessionParameter, this=this, kind=kind) 4446 4447 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4448 index = self._index 4449 4450 if self._match(TokenType.L_PAREN): 4451 expressions = t.cast( 4452 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4453 ) 4454 4455 if not self._match(TokenType.R_PAREN): 4456 self._retreat(index) 4457 else: 4458 expressions = [self._parse_id_var()] 4459 4460 if self._match_set(self.LAMBDAS): 4461 return self.LAMBDAS[self._prev.token_type](self, expressions) 4462 4463 self._retreat(index) 4464 4465 this: t.Optional[exp.Expression] 4466 4467 if self._match(TokenType.DISTINCT): 4468 this = self.expression( 4469 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4470 ) 4471 else: 4472 this = self._parse_select_or_expression(alias=alias) 4473 4474 return self._parse_limit( 4475 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4476 ) 4477 4478 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4479 index = self._index 4480 4481 if not self._match(TokenType.L_PAREN): 4482 return this 4483 4484 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4485 # expr can be of both types 4486 if self._match_set(self.SELECT_START_TOKENS): 4487 self._retreat(index) 4488 return this 4489 4490 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4491 4492 self._match_r_paren() 4493 return self.expression(exp.Schema, this=this, expressions=args) 4494 4495 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4496 return self._parse_column_def(self._parse_field(any_token=True)) 4497 4498 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4499 # column defs are not really columns, they're identifiers 4500 if isinstance(this, exp.Column): 4501 this = this.this 4502 4503 kind = self._parse_types(schema=True) 4504 4505 if self._match_text_seq("FOR", "ORDINALITY"): 4506 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4507 4508 constraints: t.List[exp.Expression] = [] 4509 4510 if (not kind and self._match(TokenType.ALIAS)) or self._match_text_seq("ALIAS"): 4511 constraints.append( 4512 self.expression( 4513 exp.ComputedColumnConstraint, 4514 this=self._parse_conjunction(), 4515 persisted=self._match_text_seq("PERSISTED"), 4516 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4517 ) 4518 ) 4519 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4520 self._match(TokenType.ALIAS) 4521 constraints.append( 4522 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4523 ) 4524 4525 while True: 4526 constraint = self._parse_column_constraint() 4527 if not constraint: 4528 break 4529 constraints.append(constraint) 4530 4531 if not kind and not constraints: 4532 return this 4533 4534 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4535 4536 def _parse_auto_increment( 4537 self, 4538 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4539 start = None 4540 increment = None 4541 4542 if self._match(TokenType.L_PAREN, advance=False): 4543 args = self._parse_wrapped_csv(self._parse_bitwise) 4544 start = seq_get(args, 0) 4545 increment = seq_get(args, 1) 4546 elif self._match_text_seq("START"): 4547 start = self._parse_bitwise() 4548 self._match_text_seq("INCREMENT") 4549 increment = self._parse_bitwise() 4550 4551 if start and increment: 4552 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4553 4554 return exp.AutoIncrementColumnConstraint() 4555 4556 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4557 if not self._match_text_seq("REFRESH"): 4558 self._retreat(self._index - 1) 4559 return None 4560 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4561 4562 def _parse_compress(self) -> exp.CompressColumnConstraint: 4563 if self._match(TokenType.L_PAREN, advance=False): 4564 return self.expression( 4565 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4566 ) 4567 4568 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4569 4570 def _parse_generated_as_identity( 4571 self, 4572 ) -> ( 4573 exp.GeneratedAsIdentityColumnConstraint 4574 | exp.ComputedColumnConstraint 4575 | exp.GeneratedAsRowColumnConstraint 4576 ): 4577 if self._match_text_seq("BY", "DEFAULT"): 4578 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4579 this = self.expression( 4580 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4581 ) 4582 else: 4583 self._match_text_seq("ALWAYS") 4584 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4585 4586 self._match(TokenType.ALIAS) 4587 4588 if self._match_text_seq("ROW"): 4589 start = self._match_text_seq("START") 4590 if not start: 4591 self._match(TokenType.END) 4592 hidden = self._match_text_seq("HIDDEN") 4593 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4594 4595 identity = self._match_text_seq("IDENTITY") 4596 4597 if self._match(TokenType.L_PAREN): 4598 if self._match(TokenType.START_WITH): 4599 this.set("start", self._parse_bitwise()) 4600 if self._match_text_seq("INCREMENT", "BY"): 4601 this.set("increment", self._parse_bitwise()) 4602 if self._match_text_seq("MINVALUE"): 4603 this.set("minvalue", self._parse_bitwise()) 4604 if self._match_text_seq("MAXVALUE"): 4605 this.set("maxvalue", self._parse_bitwise()) 4606 4607 if self._match_text_seq("CYCLE"): 4608 this.set("cycle", True) 4609 elif self._match_text_seq("NO", "CYCLE"): 4610 this.set("cycle", False) 4611 4612 if not identity: 4613 this.set("expression", self._parse_bitwise()) 4614 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4615 args = self._parse_csv(self._parse_bitwise) 4616 this.set("start", seq_get(args, 0)) 4617 this.set("increment", seq_get(args, 1)) 4618 4619 self._match_r_paren() 4620 4621 return this 4622 4623 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4624 self._match_text_seq("LENGTH") 4625 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4626 4627 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4628 if self._match_text_seq("NULL"): 4629 return self.expression(exp.NotNullColumnConstraint) 4630 if self._match_text_seq("CASESPECIFIC"): 4631 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4632 if self._match_text_seq("FOR", "REPLICATION"): 4633 return self.expression(exp.NotForReplicationColumnConstraint) 4634 return None 4635 4636 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4637 if self._match(TokenType.CONSTRAINT): 4638 this = self._parse_id_var() 4639 else: 4640 this = None 4641 4642 if self._match_texts(self.CONSTRAINT_PARSERS): 4643 return self.expression( 4644 exp.ColumnConstraint, 4645 this=this, 4646 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4647 ) 4648 4649 return this 4650 4651 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4652 if not self._match(TokenType.CONSTRAINT): 4653 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4654 4655 return self.expression( 4656 exp.Constraint, 4657 this=self._parse_id_var(), 4658 expressions=self._parse_unnamed_constraints(), 4659 ) 4660 4661 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4662 constraints = [] 4663 while True: 4664 constraint = self._parse_unnamed_constraint() or self._parse_function() 4665 if not constraint: 4666 break 4667 constraints.append(constraint) 4668 4669 return constraints 4670 4671 def _parse_unnamed_constraint( 4672 self, constraints: t.Optional[t.Collection[str]] = None 4673 ) -> t.Optional[exp.Expression]: 4674 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4675 constraints or self.CONSTRAINT_PARSERS 4676 ): 4677 return None 4678 4679 constraint = self._prev.text.upper() 4680 if constraint not in self.CONSTRAINT_PARSERS: 4681 self.raise_error(f"No parser found for schema constraint {constraint}.") 4682 4683 return self.CONSTRAINT_PARSERS[constraint](self) 4684 4685 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4686 self._match_text_seq("KEY") 4687 return self.expression( 4688 exp.UniqueColumnConstraint, 4689 this=self._parse_schema(self._parse_id_var(any_token=False)), 4690 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4691 on_conflict=self._parse_on_conflict(), 4692 ) 4693 4694 def _parse_key_constraint_options(self) -> t.List[str]: 4695 options = [] 4696 while True: 4697 if not self._curr: 4698 break 4699 4700 if self._match(TokenType.ON): 4701 action = None 4702 on = self._advance_any() and self._prev.text 4703 4704 if self._match_text_seq("NO", "ACTION"): 4705 action = "NO ACTION" 4706 elif self._match_text_seq("CASCADE"): 4707 action = "CASCADE" 4708 elif self._match_text_seq("RESTRICT"): 4709 action = "RESTRICT" 4710 elif self._match_pair(TokenType.SET, TokenType.NULL): 4711 action = "SET NULL" 4712 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4713 action = "SET DEFAULT" 4714 else: 4715 self.raise_error("Invalid key constraint") 4716 4717 options.append(f"ON {on} {action}") 4718 elif self._match_text_seq("NOT", "ENFORCED"): 4719 options.append("NOT ENFORCED") 4720 elif self._match_text_seq("DEFERRABLE"): 4721 options.append("DEFERRABLE") 4722 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4723 options.append("INITIALLY DEFERRED") 4724 elif self._match_text_seq("NORELY"): 4725 options.append("NORELY") 4726 elif self._match_text_seq("MATCH", "FULL"): 4727 options.append("MATCH FULL") 4728 else: 4729 break 4730 4731 return options 4732 4733 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4734 if match and not self._match(TokenType.REFERENCES): 4735 return None 4736 4737 expressions = None 4738 this = self._parse_table(schema=True) 4739 options = self._parse_key_constraint_options() 4740 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4741 4742 def _parse_foreign_key(self) -> exp.ForeignKey: 4743 expressions = self._parse_wrapped_id_vars() 4744 reference = self._parse_references() 4745 options = {} 4746 4747 while self._match(TokenType.ON): 4748 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4749 self.raise_error("Expected DELETE or UPDATE") 4750 4751 kind = self._prev.text.lower() 4752 4753 if self._match_text_seq("NO", "ACTION"): 4754 action = "NO ACTION" 4755 elif self._match(TokenType.SET): 4756 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4757 action = "SET " + self._prev.text.upper() 4758 else: 4759 self._advance() 4760 action = self._prev.text.upper() 4761 4762 options[kind] = action 4763 4764 return self.expression( 4765 exp.ForeignKey, 4766 expressions=expressions, 4767 reference=reference, 4768 **options, # type: ignore 4769 ) 4770 4771 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4772 return self._parse_field() 4773 4774 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4775 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4776 self._retreat(self._index - 1) 4777 return None 4778 4779 id_vars = self._parse_wrapped_id_vars() 4780 return self.expression( 4781 exp.PeriodForSystemTimeConstraint, 4782 this=seq_get(id_vars, 0), 4783 expression=seq_get(id_vars, 1), 4784 ) 4785 4786 def _parse_primary_key( 4787 self, wrapped_optional: bool = False, in_props: bool = False 4788 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4789 desc = ( 4790 self._match_set((TokenType.ASC, TokenType.DESC)) 4791 and self._prev.token_type == TokenType.DESC 4792 ) 4793 4794 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4795 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4796 4797 expressions = self._parse_wrapped_csv( 4798 self._parse_primary_key_part, optional=wrapped_optional 4799 ) 4800 options = self._parse_key_constraint_options() 4801 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4802 4803 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4804 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4805 4806 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4807 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4808 return this 4809 4810 bracket_kind = self._prev.token_type 4811 expressions = self._parse_csv( 4812 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4813 ) 4814 4815 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4816 self.raise_error("Expected ]") 4817 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4818 self.raise_error("Expected }") 4819 4820 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4821 if bracket_kind == TokenType.L_BRACE: 4822 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4823 elif not this or this.name.upper() == "ARRAY": 4824 this = self.expression(exp.Array, expressions=expressions) 4825 else: 4826 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4827 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4828 4829 self._add_comments(this) 4830 return self._parse_bracket(this) 4831 4832 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4833 if self._match(TokenType.COLON): 4834 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4835 return this 4836 4837 def _parse_case(self) -> t.Optional[exp.Expression]: 4838 ifs = [] 4839 default = None 4840 4841 comments = self._prev_comments 4842 expression = self._parse_conjunction() 4843 4844 while self._match(TokenType.WHEN): 4845 this = self._parse_conjunction() 4846 self._match(TokenType.THEN) 4847 then = self._parse_conjunction() 4848 ifs.append(self.expression(exp.If, this=this, true=then)) 4849 4850 if self._match(TokenType.ELSE): 4851 default = self._parse_conjunction() 4852 4853 if not self._match(TokenType.END): 4854 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4855 default = exp.column("interval") 4856 else: 4857 self.raise_error("Expected END after CASE", self._prev) 4858 4859 return self.expression( 4860 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4861 ) 4862 4863 def _parse_if(self) -> t.Optional[exp.Expression]: 4864 if self._match(TokenType.L_PAREN): 4865 args = self._parse_csv(self._parse_conjunction) 4866 this = self.validate_expression(exp.If.from_arg_list(args), args) 4867 self._match_r_paren() 4868 else: 4869 index = self._index - 1 4870 4871 if self.NO_PAREN_IF_COMMANDS and index == 0: 4872 return self._parse_as_command(self._prev) 4873 4874 condition = self._parse_conjunction() 4875 4876 if not condition: 4877 self._retreat(index) 4878 return None 4879 4880 self._match(TokenType.THEN) 4881 true = self._parse_conjunction() 4882 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4883 self._match(TokenType.END) 4884 this = self.expression(exp.If, this=condition, true=true, false=false) 4885 4886 return this 4887 4888 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4889 if not self._match_text_seq("VALUE", "FOR"): 4890 self._retreat(self._index - 1) 4891 return None 4892 4893 return self.expression( 4894 exp.NextValueFor, 4895 this=self._parse_column(), 4896 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4897 ) 4898 4899 def _parse_extract(self) -> exp.Extract: 4900 this = self._parse_function() or self._parse_var() or self._parse_type() 4901 4902 if self._match(TokenType.FROM): 4903 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4904 4905 if not self._match(TokenType.COMMA): 4906 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4907 4908 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4909 4910 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4911 this = self._parse_conjunction() 4912 4913 if not self._match(TokenType.ALIAS): 4914 if self._match(TokenType.COMMA): 4915 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4916 4917 self.raise_error("Expected AS after CAST") 4918 4919 fmt = None 4920 to = self._parse_types() 4921 4922 if self._match(TokenType.FORMAT): 4923 fmt_string = self._parse_string() 4924 fmt = self._parse_at_time_zone(fmt_string) 4925 4926 if not to: 4927 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4928 if to.this in exp.DataType.TEMPORAL_TYPES: 4929 this = self.expression( 4930 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4931 this=this, 4932 format=exp.Literal.string( 4933 format_time( 4934 fmt_string.this if fmt_string else "", 4935 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4936 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4937 ) 4938 ), 4939 ) 4940 4941 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4942 this.set("zone", fmt.args["zone"]) 4943 return this 4944 elif not to: 4945 self.raise_error("Expected TYPE after CAST") 4946 elif isinstance(to, exp.Identifier): 4947 to = exp.DataType.build(to.name, udt=True) 4948 elif to.this == exp.DataType.Type.CHAR: 4949 if self._match(TokenType.CHARACTER_SET): 4950 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4951 4952 return self.expression( 4953 exp.Cast if strict else exp.TryCast, 4954 this=this, 4955 to=to, 4956 format=fmt, 4957 safe=safe, 4958 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 4959 ) 4960 4961 def _parse_string_agg(self) -> exp.Expression: 4962 if self._match(TokenType.DISTINCT): 4963 args: t.List[t.Optional[exp.Expression]] = [ 4964 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4965 ] 4966 if self._match(TokenType.COMMA): 4967 args.extend(self._parse_csv(self._parse_conjunction)) 4968 else: 4969 args = self._parse_csv(self._parse_conjunction) # type: ignore 4970 4971 index = self._index 4972 if not self._match(TokenType.R_PAREN) and args: 4973 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4974 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4975 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4976 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4977 4978 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4979 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4980 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4981 if not self._match_text_seq("WITHIN", "GROUP"): 4982 self._retreat(index) 4983 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4984 4985 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4986 order = self._parse_order(this=seq_get(args, 0)) 4987 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4988 4989 def _parse_convert( 4990 self, strict: bool, safe: t.Optional[bool] = None 4991 ) -> t.Optional[exp.Expression]: 4992 this = self._parse_bitwise() 4993 4994 if self._match(TokenType.USING): 4995 to: t.Optional[exp.Expression] = self.expression( 4996 exp.CharacterSet, this=self._parse_var() 4997 ) 4998 elif self._match(TokenType.COMMA): 4999 to = self._parse_types() 5000 else: 5001 to = None 5002 5003 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5004 5005 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5006 """ 5007 There are generally two variants of the DECODE function: 5008 5009 - DECODE(bin, charset) 5010 - DECODE(expression, search, result [, search, result] ... [, default]) 5011 5012 The second variant will always be parsed into a CASE expression. Note that NULL 5013 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5014 instead of relying on pattern matching. 5015 """ 5016 args = self._parse_csv(self._parse_conjunction) 5017 5018 if len(args) < 3: 5019 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5020 5021 expression, *expressions = args 5022 if not expression: 5023 return None 5024 5025 ifs = [] 5026 for search, result in zip(expressions[::2], expressions[1::2]): 5027 if not search or not result: 5028 return None 5029 5030 if isinstance(search, exp.Literal): 5031 ifs.append( 5032 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5033 ) 5034 elif isinstance(search, exp.Null): 5035 ifs.append( 5036 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5037 ) 5038 else: 5039 cond = exp.or_( 5040 exp.EQ(this=expression.copy(), expression=search), 5041 exp.and_( 5042 exp.Is(this=expression.copy(), expression=exp.Null()), 5043 exp.Is(this=search.copy(), expression=exp.Null()), 5044 copy=False, 5045 ), 5046 copy=False, 5047 ) 5048 ifs.append(exp.If(this=cond, true=result)) 5049 5050 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5051 5052 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5053 self._match_text_seq("KEY") 5054 key = self._parse_column() 5055 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5056 self._match_text_seq("VALUE") 5057 value = self._parse_bitwise() 5058 5059 if not key and not value: 5060 return None 5061 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5062 5063 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5064 if not this or not self._match_text_seq("FORMAT", "JSON"): 5065 return this 5066 5067 return self.expression(exp.FormatJson, this=this) 5068 5069 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5070 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5071 for value in values: 5072 if self._match_text_seq(value, "ON", on): 5073 return f"{value} ON {on}" 5074 5075 return None 5076 5077 @t.overload 5078 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5079 5080 @t.overload 5081 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5082 5083 def _parse_json_object(self, agg=False): 5084 star = self._parse_star() 5085 expressions = ( 5086 [star] 5087 if star 5088 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5089 ) 5090 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5091 5092 unique_keys = None 5093 if self._match_text_seq("WITH", "UNIQUE"): 5094 unique_keys = True 5095 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5096 unique_keys = False 5097 5098 self._match_text_seq("KEYS") 5099 5100 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5101 self._parse_type() 5102 ) 5103 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5104 5105 return self.expression( 5106 exp.JSONObjectAgg if agg else exp.JSONObject, 5107 expressions=expressions, 5108 null_handling=null_handling, 5109 unique_keys=unique_keys, 5110 return_type=return_type, 5111 encoding=encoding, 5112 ) 5113 5114 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5115 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5116 if not self._match_text_seq("NESTED"): 5117 this = self._parse_id_var() 5118 kind = self._parse_types(allow_identifiers=False) 5119 nested = None 5120 else: 5121 this = None 5122 kind = None 5123 nested = True 5124 5125 path = self._match_text_seq("PATH") and self._parse_string() 5126 nested_schema = nested and self._parse_json_schema() 5127 5128 return self.expression( 5129 exp.JSONColumnDef, 5130 this=this, 5131 kind=kind, 5132 path=path, 5133 nested_schema=nested_schema, 5134 ) 5135 5136 def _parse_json_schema(self) -> exp.JSONSchema: 5137 self._match_text_seq("COLUMNS") 5138 return self.expression( 5139 exp.JSONSchema, 5140 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5141 ) 5142 5143 def _parse_json_table(self) -> exp.JSONTable: 5144 this = self._parse_format_json(self._parse_bitwise()) 5145 path = self._match(TokenType.COMMA) and self._parse_string() 5146 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5147 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5148 schema = self._parse_json_schema() 5149 5150 return exp.JSONTable( 5151 this=this, 5152 schema=schema, 5153 path=path, 5154 error_handling=error_handling, 5155 empty_handling=empty_handling, 5156 ) 5157 5158 def _parse_match_against(self) -> exp.MatchAgainst: 5159 expressions = self._parse_csv(self._parse_column) 5160 5161 self._match_text_seq(")", "AGAINST", "(") 5162 5163 this = self._parse_string() 5164 5165 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5166 modifier = "IN NATURAL LANGUAGE MODE" 5167 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5168 modifier = f"{modifier} WITH QUERY EXPANSION" 5169 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5170 modifier = "IN BOOLEAN MODE" 5171 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5172 modifier = "WITH QUERY EXPANSION" 5173 else: 5174 modifier = None 5175 5176 return self.expression( 5177 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5178 ) 5179 5180 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5181 def _parse_open_json(self) -> exp.OpenJSON: 5182 this = self._parse_bitwise() 5183 path = self._match(TokenType.COMMA) and self._parse_string() 5184 5185 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5186 this = self._parse_field(any_token=True) 5187 kind = self._parse_types() 5188 path = self._parse_string() 5189 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5190 5191 return self.expression( 5192 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5193 ) 5194 5195 expressions = None 5196 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5197 self._match_l_paren() 5198 expressions = self._parse_csv(_parse_open_json_column_def) 5199 5200 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5201 5202 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5203 args = self._parse_csv(self._parse_bitwise) 5204 5205 if self._match(TokenType.IN): 5206 return self.expression( 5207 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5208 ) 5209 5210 if haystack_first: 5211 haystack = seq_get(args, 0) 5212 needle = seq_get(args, 1) 5213 else: 5214 needle = seq_get(args, 0) 5215 haystack = seq_get(args, 1) 5216 5217 return self.expression( 5218 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5219 ) 5220 5221 def _parse_predict(self) -> exp.Predict: 5222 self._match_text_seq("MODEL") 5223 this = self._parse_table() 5224 5225 self._match(TokenType.COMMA) 5226 self._match_text_seq("TABLE") 5227 5228 return self.expression( 5229 exp.Predict, 5230 this=this, 5231 expression=self._parse_table(), 5232 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5233 ) 5234 5235 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5236 args = self._parse_csv(self._parse_table) 5237 return exp.JoinHint(this=func_name.upper(), expressions=args) 5238 5239 def _parse_substring(self) -> exp.Substring: 5240 # Postgres supports the form: substring(string [from int] [for int]) 5241 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5242 5243 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5244 5245 if self._match(TokenType.FROM): 5246 args.append(self._parse_bitwise()) 5247 if self._match(TokenType.FOR): 5248 args.append(self._parse_bitwise()) 5249 5250 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5251 5252 def _parse_trim(self) -> exp.Trim: 5253 # https://www.w3resource.com/sql/character-functions/trim.php 5254 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5255 5256 position = None 5257 collation = None 5258 expression = None 5259 5260 if self._match_texts(self.TRIM_TYPES): 5261 position = self._prev.text.upper() 5262 5263 this = self._parse_bitwise() 5264 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5265 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5266 expression = self._parse_bitwise() 5267 5268 if invert_order: 5269 this, expression = expression, this 5270 5271 if self._match(TokenType.COLLATE): 5272 collation = self._parse_bitwise() 5273 5274 return self.expression( 5275 exp.Trim, this=this, position=position, expression=expression, collation=collation 5276 ) 5277 5278 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5279 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5280 5281 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5282 return self._parse_window(self._parse_id_var(), alias=True) 5283 5284 def _parse_respect_or_ignore_nulls( 5285 self, this: t.Optional[exp.Expression] 5286 ) -> t.Optional[exp.Expression]: 5287 if self._match_text_seq("IGNORE", "NULLS"): 5288 return self.expression(exp.IgnoreNulls, this=this) 5289 if self._match_text_seq("RESPECT", "NULLS"): 5290 return self.expression(exp.RespectNulls, this=this) 5291 return this 5292 5293 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5294 if self._match(TokenType.HAVING): 5295 self._match_texts(("MAX", "MIN")) 5296 max = self._prev.text.upper() != "MIN" 5297 return self.expression( 5298 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5299 ) 5300 5301 return this 5302 5303 def _parse_window( 5304 self, this: t.Optional[exp.Expression], alias: bool = False 5305 ) -> t.Optional[exp.Expression]: 5306 func = this 5307 comments = func.comments if isinstance(func, exp.Expression) else None 5308 5309 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5310 self._match(TokenType.WHERE) 5311 this = self.expression( 5312 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5313 ) 5314 self._match_r_paren() 5315 5316 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5317 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5318 if self._match_text_seq("WITHIN", "GROUP"): 5319 order = self._parse_wrapped(self._parse_order) 5320 this = self.expression(exp.WithinGroup, this=this, expression=order) 5321 5322 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5323 # Some dialects choose to implement and some do not. 5324 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5325 5326 # There is some code above in _parse_lambda that handles 5327 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5328 5329 # The below changes handle 5330 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5331 5332 # Oracle allows both formats 5333 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5334 # and Snowflake chose to do the same for familiarity 5335 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5336 if isinstance(this, exp.AggFunc): 5337 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5338 5339 if ignore_respect and ignore_respect is not this: 5340 ignore_respect.replace(ignore_respect.this) 5341 this = self.expression(ignore_respect.__class__, this=this) 5342 5343 this = self._parse_respect_or_ignore_nulls(this) 5344 5345 # bigquery select from window x AS (partition by ...) 5346 if alias: 5347 over = None 5348 self._match(TokenType.ALIAS) 5349 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5350 return this 5351 else: 5352 over = self._prev.text.upper() 5353 5354 if comments: 5355 func.comments = None # type: ignore 5356 5357 if not self._match(TokenType.L_PAREN): 5358 return self.expression( 5359 exp.Window, 5360 comments=comments, 5361 this=this, 5362 alias=self._parse_id_var(False), 5363 over=over, 5364 ) 5365 5366 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5367 5368 first = self._match(TokenType.FIRST) 5369 if self._match_text_seq("LAST"): 5370 first = False 5371 5372 partition, order = self._parse_partition_and_order() 5373 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5374 5375 if kind: 5376 self._match(TokenType.BETWEEN) 5377 start = self._parse_window_spec() 5378 self._match(TokenType.AND) 5379 end = self._parse_window_spec() 5380 5381 spec = self.expression( 5382 exp.WindowSpec, 5383 kind=kind, 5384 start=start["value"], 5385 start_side=start["side"], 5386 end=end["value"], 5387 end_side=end["side"], 5388 ) 5389 else: 5390 spec = None 5391 5392 self._match_r_paren() 5393 5394 window = self.expression( 5395 exp.Window, 5396 comments=comments, 5397 this=this, 5398 partition_by=partition, 5399 order=order, 5400 spec=spec, 5401 alias=window_alias, 5402 over=over, 5403 first=first, 5404 ) 5405 5406 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5407 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5408 return self._parse_window(window, alias=alias) 5409 5410 return window 5411 5412 def _parse_partition_and_order( 5413 self, 5414 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5415 return self._parse_partition_by(), self._parse_order() 5416 5417 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5418 self._match(TokenType.BETWEEN) 5419 5420 return { 5421 "value": ( 5422 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5423 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5424 or self._parse_bitwise() 5425 ), 5426 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5427 } 5428 5429 def _parse_alias( 5430 self, this: t.Optional[exp.Expression], explicit: bool = False 5431 ) -> t.Optional[exp.Expression]: 5432 any_token = self._match(TokenType.ALIAS) 5433 comments = self._prev_comments 5434 5435 if explicit and not any_token: 5436 return this 5437 5438 if self._match(TokenType.L_PAREN): 5439 aliases = self.expression( 5440 exp.Aliases, 5441 comments=comments, 5442 this=this, 5443 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5444 ) 5445 self._match_r_paren(aliases) 5446 return aliases 5447 5448 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5449 self.STRING_ALIASES and self._parse_string_as_identifier() 5450 ) 5451 5452 if alias: 5453 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5454 column = this.this 5455 5456 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5457 if not this.comments and column and column.comments: 5458 this.comments = column.comments 5459 column.comments = None 5460 5461 return this 5462 5463 def _parse_id_var( 5464 self, 5465 any_token: bool = True, 5466 tokens: t.Optional[t.Collection[TokenType]] = None, 5467 ) -> t.Optional[exp.Expression]: 5468 identifier = self._parse_identifier() 5469 5470 if identifier: 5471 return identifier 5472 5473 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5474 quoted = self._prev.token_type == TokenType.STRING 5475 return exp.Identifier(this=self._prev.text, quoted=quoted) 5476 5477 return None 5478 5479 def _parse_string(self) -> t.Optional[exp.Expression]: 5480 if self._match_set(self.STRING_PARSERS): 5481 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5482 return self._parse_placeholder() 5483 5484 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5485 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5486 5487 def _parse_number(self) -> t.Optional[exp.Expression]: 5488 if self._match_set(self.NUMERIC_PARSERS): 5489 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5490 return self._parse_placeholder() 5491 5492 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5493 if self._match(TokenType.IDENTIFIER): 5494 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5495 return self._parse_placeholder() 5496 5497 def _parse_var( 5498 self, 5499 any_token: bool = False, 5500 tokens: t.Optional[t.Collection[TokenType]] = None, 5501 upper: bool = False, 5502 ) -> t.Optional[exp.Expression]: 5503 if ( 5504 (any_token and self._advance_any()) 5505 or self._match(TokenType.VAR) 5506 or (self._match_set(tokens) if tokens else False) 5507 ): 5508 return self.expression( 5509 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5510 ) 5511 return self._parse_placeholder() 5512 5513 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5514 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5515 self._advance() 5516 return self._prev 5517 return None 5518 5519 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5520 return self._parse_var() or self._parse_string() 5521 5522 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5523 return self._parse_primary() or self._parse_var(any_token=True) 5524 5525 def _parse_null(self) -> t.Optional[exp.Expression]: 5526 if self._match_set(self.NULL_TOKENS): 5527 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5528 return self._parse_placeholder() 5529 5530 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5531 if self._match(TokenType.TRUE): 5532 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5533 if self._match(TokenType.FALSE): 5534 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5535 return self._parse_placeholder() 5536 5537 def _parse_star(self) -> t.Optional[exp.Expression]: 5538 if self._match(TokenType.STAR): 5539 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5540 return self._parse_placeholder() 5541 5542 def _parse_parameter(self) -> exp.Parameter: 5543 self._match(TokenType.L_BRACE) 5544 this = self._parse_identifier() or self._parse_primary_or_var() 5545 expression = self._match(TokenType.COLON) and ( 5546 self._parse_identifier() or self._parse_primary_or_var() 5547 ) 5548 self._match(TokenType.R_BRACE) 5549 return self.expression(exp.Parameter, this=this, expression=expression) 5550 5551 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5552 if self._match_set(self.PLACEHOLDER_PARSERS): 5553 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5554 if placeholder: 5555 return placeholder 5556 self._advance(-1) 5557 return None 5558 5559 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5560 if not self._match(TokenType.EXCEPT): 5561 return None 5562 if self._match(TokenType.L_PAREN, advance=False): 5563 return self._parse_wrapped_csv(self._parse_column) 5564 5565 except_column = self._parse_column() 5566 return [except_column] if except_column else None 5567 5568 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5569 if not self._match(TokenType.REPLACE): 5570 return None 5571 if self._match(TokenType.L_PAREN, advance=False): 5572 return self._parse_wrapped_csv(self._parse_expression) 5573 5574 replace_expression = self._parse_expression() 5575 return [replace_expression] if replace_expression else None 5576 5577 def _parse_csv( 5578 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5579 ) -> t.List[exp.Expression]: 5580 parse_result = parse_method() 5581 items = [parse_result] if parse_result is not None else [] 5582 5583 while self._match(sep): 5584 self._add_comments(parse_result) 5585 parse_result = parse_method() 5586 if parse_result is not None: 5587 items.append(parse_result) 5588 5589 return items 5590 5591 def _parse_tokens( 5592 self, parse_method: t.Callable, expressions: t.Dict 5593 ) -> t.Optional[exp.Expression]: 5594 this = parse_method() 5595 5596 while self._match_set(expressions): 5597 this = self.expression( 5598 expressions[self._prev.token_type], 5599 this=this, 5600 comments=self._prev_comments, 5601 expression=parse_method(), 5602 ) 5603 5604 return this 5605 5606 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5607 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5608 5609 def _parse_wrapped_csv( 5610 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5611 ) -> t.List[exp.Expression]: 5612 return self._parse_wrapped( 5613 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5614 ) 5615 5616 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5617 wrapped = self._match(TokenType.L_PAREN) 5618 if not wrapped and not optional: 5619 self.raise_error("Expecting (") 5620 parse_result = parse_method() 5621 if wrapped: 5622 self._match_r_paren() 5623 return parse_result 5624 5625 def _parse_expressions(self) -> t.List[exp.Expression]: 5626 return self._parse_csv(self._parse_expression) 5627 5628 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5629 return self._parse_select() or self._parse_set_operations( 5630 self._parse_expression() if alias else self._parse_conjunction() 5631 ) 5632 5633 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5634 return self._parse_query_modifiers( 5635 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5636 ) 5637 5638 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5639 this = None 5640 if self._match_texts(self.TRANSACTION_KIND): 5641 this = self._prev.text 5642 5643 self._match_texts(("TRANSACTION", "WORK")) 5644 5645 modes = [] 5646 while True: 5647 mode = [] 5648 while self._match(TokenType.VAR): 5649 mode.append(self._prev.text) 5650 5651 if mode: 5652 modes.append(" ".join(mode)) 5653 if not self._match(TokenType.COMMA): 5654 break 5655 5656 return self.expression(exp.Transaction, this=this, modes=modes) 5657 5658 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5659 chain = None 5660 savepoint = None 5661 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5662 5663 self._match_texts(("TRANSACTION", "WORK")) 5664 5665 if self._match_text_seq("TO"): 5666 self._match_text_seq("SAVEPOINT") 5667 savepoint = self._parse_id_var() 5668 5669 if self._match(TokenType.AND): 5670 chain = not self._match_text_seq("NO") 5671 self._match_text_seq("CHAIN") 5672 5673 if is_rollback: 5674 return self.expression(exp.Rollback, savepoint=savepoint) 5675 5676 return self.expression(exp.Commit, chain=chain) 5677 5678 def _parse_refresh(self) -> exp.Refresh: 5679 self._match(TokenType.TABLE) 5680 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5681 5682 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5683 if not self._match_text_seq("ADD"): 5684 return None 5685 5686 self._match(TokenType.COLUMN) 5687 exists_column = self._parse_exists(not_=True) 5688 expression = self._parse_field_def() 5689 5690 if expression: 5691 expression.set("exists", exists_column) 5692 5693 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5694 if self._match_texts(("FIRST", "AFTER")): 5695 position = self._prev.text 5696 column_position = self.expression( 5697 exp.ColumnPosition, this=self._parse_column(), position=position 5698 ) 5699 expression.set("position", column_position) 5700 5701 return expression 5702 5703 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5704 drop = self._match(TokenType.DROP) and self._parse_drop() 5705 if drop and not isinstance(drop, exp.Command): 5706 drop.set("kind", drop.args.get("kind", "COLUMN")) 5707 return drop 5708 5709 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5710 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5711 return self.expression( 5712 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5713 ) 5714 5715 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5716 index = self._index - 1 5717 5718 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5719 return self._parse_csv( 5720 lambda: self.expression( 5721 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5722 ) 5723 ) 5724 5725 self._retreat(index) 5726 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5727 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5728 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5729 5730 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5731 self._match(TokenType.COLUMN) 5732 column = self._parse_field(any_token=True) 5733 5734 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5735 return self.expression(exp.AlterColumn, this=column, drop=True) 5736 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5737 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5738 if self._match(TokenType.COMMENT): 5739 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5740 5741 self._match_text_seq("SET", "DATA") 5742 self._match_text_seq("TYPE") 5743 return self.expression( 5744 exp.AlterColumn, 5745 this=column, 5746 dtype=self._parse_types(), 5747 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5748 using=self._match(TokenType.USING) and self._parse_conjunction(), 5749 ) 5750 5751 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5752 index = self._index - 1 5753 5754 partition_exists = self._parse_exists() 5755 if self._match(TokenType.PARTITION, advance=False): 5756 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5757 5758 self._retreat(index) 5759 return self._parse_csv(self._parse_drop_column) 5760 5761 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5762 if self._match(TokenType.COLUMN): 5763 exists = self._parse_exists() 5764 old_column = self._parse_column() 5765 to = self._match_text_seq("TO") 5766 new_column = self._parse_column() 5767 5768 if old_column is None or to is None or new_column is None: 5769 return None 5770 5771 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5772 5773 self._match_text_seq("TO") 5774 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5775 5776 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5777 start = self._prev 5778 5779 if not self._match(TokenType.TABLE): 5780 return self._parse_as_command(start) 5781 5782 exists = self._parse_exists() 5783 only = self._match_text_seq("ONLY") 5784 this = self._parse_table(schema=True) 5785 5786 if self._next: 5787 self._advance() 5788 5789 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5790 if parser: 5791 actions = ensure_list(parser(self)) 5792 options = self._parse_csv(self._parse_property) 5793 5794 if not self._curr and actions: 5795 return self.expression( 5796 exp.AlterTable, 5797 this=this, 5798 exists=exists, 5799 actions=actions, 5800 only=only, 5801 options=options, 5802 ) 5803 5804 return self._parse_as_command(start) 5805 5806 def _parse_merge(self) -> exp.Merge: 5807 self._match(TokenType.INTO) 5808 target = self._parse_table() 5809 5810 if target and self._match(TokenType.ALIAS, advance=False): 5811 target.set("alias", self._parse_table_alias()) 5812 5813 self._match(TokenType.USING) 5814 using = self._parse_table() 5815 5816 self._match(TokenType.ON) 5817 on = self._parse_conjunction() 5818 5819 return self.expression( 5820 exp.Merge, 5821 this=target, 5822 using=using, 5823 on=on, 5824 expressions=self._parse_when_matched(), 5825 ) 5826 5827 def _parse_when_matched(self) -> t.List[exp.When]: 5828 whens = [] 5829 5830 while self._match(TokenType.WHEN): 5831 matched = not self._match(TokenType.NOT) 5832 self._match_text_seq("MATCHED") 5833 source = ( 5834 False 5835 if self._match_text_seq("BY", "TARGET") 5836 else self._match_text_seq("BY", "SOURCE") 5837 ) 5838 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5839 5840 self._match(TokenType.THEN) 5841 5842 if self._match(TokenType.INSERT): 5843 _this = self._parse_star() 5844 if _this: 5845 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5846 else: 5847 then = self.expression( 5848 exp.Insert, 5849 this=self._parse_value(), 5850 expression=self._match_text_seq("VALUES") and self._parse_value(), 5851 ) 5852 elif self._match(TokenType.UPDATE): 5853 expressions = self._parse_star() 5854 if expressions: 5855 then = self.expression(exp.Update, expressions=expressions) 5856 else: 5857 then = self.expression( 5858 exp.Update, 5859 expressions=self._match(TokenType.SET) 5860 and self._parse_csv(self._parse_equality), 5861 ) 5862 elif self._match(TokenType.DELETE): 5863 then = self.expression(exp.Var, this=self._prev.text) 5864 else: 5865 then = None 5866 5867 whens.append( 5868 self.expression( 5869 exp.When, 5870 matched=matched, 5871 source=source, 5872 condition=condition, 5873 then=then, 5874 ) 5875 ) 5876 return whens 5877 5878 def _parse_show(self) -> t.Optional[exp.Expression]: 5879 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5880 if parser: 5881 return parser(self) 5882 return self._parse_as_command(self._prev) 5883 5884 def _parse_set_item_assignment( 5885 self, kind: t.Optional[str] = None 5886 ) -> t.Optional[exp.Expression]: 5887 index = self._index 5888 5889 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5890 return self._parse_set_transaction(global_=kind == "GLOBAL") 5891 5892 left = self._parse_primary() or self._parse_id_var() 5893 assignment_delimiter = self._match_texts(("=", "TO")) 5894 5895 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5896 self._retreat(index) 5897 return None 5898 5899 right = self._parse_statement() or self._parse_id_var() 5900 this = self.expression(exp.EQ, this=left, expression=right) 5901 5902 return self.expression(exp.SetItem, this=this, kind=kind) 5903 5904 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5905 self._match_text_seq("TRANSACTION") 5906 characteristics = self._parse_csv( 5907 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5908 ) 5909 return self.expression( 5910 exp.SetItem, 5911 expressions=characteristics, 5912 kind="TRANSACTION", 5913 **{"global": global_}, # type: ignore 5914 ) 5915 5916 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5917 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5918 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5919 5920 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5921 index = self._index 5922 set_ = self.expression( 5923 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5924 ) 5925 5926 if self._curr: 5927 self._retreat(index) 5928 return self._parse_as_command(self._prev) 5929 5930 return set_ 5931 5932 def _parse_var_from_options( 5933 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5934 ) -> t.Optional[exp.Var]: 5935 start = self._curr 5936 if not start: 5937 return None 5938 5939 option = start.text.upper() 5940 continuations = options.get(option) 5941 5942 index = self._index 5943 self._advance() 5944 for keywords in continuations or []: 5945 if isinstance(keywords, str): 5946 keywords = (keywords,) 5947 5948 if self._match_text_seq(*keywords): 5949 option = f"{option} {' '.join(keywords)}" 5950 break 5951 else: 5952 if continuations or continuations is None: 5953 if raise_unmatched: 5954 self.raise_error(f"Unknown option {option}") 5955 5956 self._retreat(index) 5957 return None 5958 5959 return exp.var(option) 5960 5961 def _parse_as_command(self, start: Token) -> exp.Command: 5962 while self._curr: 5963 self._advance() 5964 text = self._find_sql(start, self._prev) 5965 size = len(start.text) 5966 self._warn_unsupported() 5967 return exp.Command(this=text[:size], expression=text[size:]) 5968 5969 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5970 settings = [] 5971 5972 self._match_l_paren() 5973 kind = self._parse_id_var() 5974 5975 if self._match(TokenType.L_PAREN): 5976 while True: 5977 key = self._parse_id_var() 5978 value = self._parse_primary() 5979 5980 if not key and value is None: 5981 break 5982 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5983 self._match(TokenType.R_PAREN) 5984 5985 self._match_r_paren() 5986 5987 return self.expression( 5988 exp.DictProperty, 5989 this=this, 5990 kind=kind.this if kind else None, 5991 settings=settings, 5992 ) 5993 5994 def _parse_dict_range(self, this: str) -> exp.DictRange: 5995 self._match_l_paren() 5996 has_min = self._match_text_seq("MIN") 5997 if has_min: 5998 min = self._parse_var() or self._parse_primary() 5999 self._match_text_seq("MAX") 6000 max = self._parse_var() or self._parse_primary() 6001 else: 6002 max = self._parse_var() or self._parse_primary() 6003 min = exp.Literal.number(0) 6004 self._match_r_paren() 6005 return self.expression(exp.DictRange, this=this, min=min, max=max) 6006 6007 def _parse_comprehension( 6008 self, this: t.Optional[exp.Expression] 6009 ) -> t.Optional[exp.Comprehension]: 6010 index = self._index 6011 expression = self._parse_column() 6012 if not self._match(TokenType.IN): 6013 self._retreat(index - 1) 6014 return None 6015 iterator = self._parse_column() 6016 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6017 return self.expression( 6018 exp.Comprehension, 6019 this=this, 6020 expression=expression, 6021 iterator=iterator, 6022 condition=condition, 6023 ) 6024 6025 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6026 if self._match(TokenType.HEREDOC_STRING): 6027 return self.expression(exp.Heredoc, this=self._prev.text) 6028 6029 if not self._match_text_seq("$"): 6030 return None 6031 6032 tags = ["$"] 6033 tag_text = None 6034 6035 if self._is_connected(): 6036 self._advance() 6037 tags.append(self._prev.text.upper()) 6038 else: 6039 self.raise_error("No closing $ found") 6040 6041 if tags[-1] != "$": 6042 if self._is_connected() and self._match_text_seq("$"): 6043 tag_text = tags[-1] 6044 tags.append("$") 6045 else: 6046 self.raise_error("No closing $ found") 6047 6048 heredoc_start = self._curr 6049 6050 while self._curr: 6051 if self._match_text_seq(*tags, advance=False): 6052 this = self._find_sql(heredoc_start, self._prev) 6053 self._advance(len(tags)) 6054 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6055 6056 self._advance() 6057 6058 self.raise_error(f"No closing {''.join(tags)} found") 6059 return None 6060 6061 def _find_parser( 6062 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6063 ) -> t.Optional[t.Callable]: 6064 if not self._curr: 6065 return None 6066 6067 index = self._index 6068 this = [] 6069 while True: 6070 # The current token might be multiple words 6071 curr = self._curr.text.upper() 6072 key = curr.split(" ") 6073 this.append(curr) 6074 6075 self._advance() 6076 result, trie = in_trie(trie, key) 6077 if result == TrieResult.FAILED: 6078 break 6079 6080 if result == TrieResult.EXISTS: 6081 subparser = parsers[" ".join(this)] 6082 return subparser 6083 6084 self._retreat(index) 6085 return None 6086 6087 def _match(self, token_type, advance=True, expression=None): 6088 if not self._curr: 6089 return None 6090 6091 if self._curr.token_type == token_type: 6092 if advance: 6093 self._advance() 6094 self._add_comments(expression) 6095 return True 6096 6097 return None 6098 6099 def _match_set(self, types, advance=True): 6100 if not self._curr: 6101 return None 6102 6103 if self._curr.token_type in types: 6104 if advance: 6105 self._advance() 6106 return True 6107 6108 return None 6109 6110 def _match_pair(self, token_type_a, token_type_b, advance=True): 6111 if not self._curr or not self._next: 6112 return None 6113 6114 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6115 if advance: 6116 self._advance(2) 6117 return True 6118 6119 return None 6120 6121 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6122 if not self._match(TokenType.L_PAREN, expression=expression): 6123 self.raise_error("Expecting (") 6124 6125 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6126 if not self._match(TokenType.R_PAREN, expression=expression): 6127 self.raise_error("Expecting )") 6128 6129 def _match_texts(self, texts, advance=True): 6130 if self._curr and self._curr.text.upper() in texts: 6131 if advance: 6132 self._advance() 6133 return True 6134 return None 6135 6136 def _match_text_seq(self, *texts, advance=True): 6137 index = self._index 6138 for text in texts: 6139 if self._curr and self._curr.text.upper() == text: 6140 self._advance() 6141 else: 6142 self._retreat(index) 6143 return None 6144 6145 if not advance: 6146 self._retreat(index) 6147 6148 return True 6149 6150 def _replace_lambda( 6151 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6152 ) -> t.Optional[exp.Expression]: 6153 if not node: 6154 return node 6155 6156 for column in node.find_all(exp.Column): 6157 if column.parts[0].name in lambda_variables: 6158 dot_or_id = column.to_dot() if column.table else column.this 6159 parent = column.parent 6160 6161 while isinstance(parent, exp.Dot): 6162 if not isinstance(parent.parent, exp.Dot): 6163 parent.replace(dot_or_id) 6164 break 6165 parent = parent.parent 6166 else: 6167 if column is node: 6168 node = dot_or_id 6169 else: 6170 column.replace(dot_or_id) 6171 return node 6172 6173 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6174 start = self._prev 6175 6176 # Not to be confused with TRUNCATE(number, decimals) function call 6177 if self._match(TokenType.L_PAREN): 6178 self._retreat(self._index - 2) 6179 return self._parse_function() 6180 6181 # Clickhouse supports TRUNCATE DATABASE as well 6182 is_database = self._match(TokenType.DATABASE) 6183 6184 self._match(TokenType.TABLE) 6185 6186 exists = self._parse_exists(not_=False) 6187 6188 expressions = self._parse_csv( 6189 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6190 ) 6191 6192 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6193 6194 if self._match_text_seq("RESTART", "IDENTITY"): 6195 identity = "RESTART" 6196 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6197 identity = "CONTINUE" 6198 else: 6199 identity = None 6200 6201 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6202 option = self._prev.text 6203 else: 6204 option = None 6205 6206 partition = self._parse_partition() 6207 6208 # Fallback case 6209 if self._curr: 6210 return self._parse_as_command(start) 6211 6212 return self.expression( 6213 exp.TruncateTable, 6214 expressions=expressions, 6215 is_database=is_database, 6216 exists=exists, 6217 cluster=cluster, 6218 identity=identity, 6219 option=option, 6220 partition=partition, 6221 ) 6222 6223 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6224 this = self._parse_ordered(self._parse_opclass) 6225 6226 if not self._match(TokenType.WITH): 6227 return this 6228 6229 op = self._parse_var(any_token=True) 6230 6231 return self.expression(exp.WithOperator, this=this, op=op)
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
52def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 53 # Default argument order is base, expression 54 this = seq_get(args, 0) 55 expression = seq_get(args, 1) 56 57 if expression: 58 if not dialect.LOG_BASE_FIRST: 59 this, expression = expression, this 60 return exp.Log(this=this, expression=expression) 61 62 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
65def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 66 def _builder(args: t.List, dialect: Dialect) -> E: 67 expression = expr_type( 68 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 69 ) 70 if len(args) > 2 and expr_type is exp.JSONExtract: 71 expression.set("expressions", args[2:]) 72 73 return expression 74 75 return _builder
88class Parser(metaclass=_Parser): 89 """ 90 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 91 92 Args: 93 error_level: The desired error level. 94 Default: ErrorLevel.IMMEDIATE 95 error_message_context: The amount of context to capture from a query string when displaying 96 the error message (in number of characters). 97 Default: 100 98 max_errors: Maximum number of error messages to include in a raised ParseError. 99 This is only relevant if error_level is ErrorLevel.RAISE. 100 Default: 3 101 """ 102 103 FUNCTIONS: t.Dict[str, t.Callable] = { 104 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 105 "CONCAT": lambda args, dialect: exp.Concat( 106 expressions=args, 107 safe=not dialect.STRICT_STRING_CONCAT, 108 coalesce=dialect.CONCAT_COALESCE, 109 ), 110 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 111 expressions=args, 112 safe=not dialect.STRICT_STRING_CONCAT, 113 coalesce=dialect.CONCAT_COALESCE, 114 ), 115 "DATE_TO_DATE_STR": lambda args: exp.Cast( 116 this=seq_get(args, 0), 117 to=exp.DataType(this=exp.DataType.Type.TEXT), 118 ), 119 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 120 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 121 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 122 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 123 "LIKE": build_like, 124 "LOG": build_logarithm, 125 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 126 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 127 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 128 "TIME_TO_TIME_STR": lambda args: exp.Cast( 129 this=seq_get(args, 0), 130 to=exp.DataType(this=exp.DataType.Type.TEXT), 131 ), 132 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 133 this=exp.Cast( 134 this=seq_get(args, 0), 135 to=exp.DataType(this=exp.DataType.Type.TEXT), 136 ), 137 start=exp.Literal.number(1), 138 length=exp.Literal.number(10), 139 ), 140 "VAR_MAP": build_var_map, 141 } 142 143 NO_PAREN_FUNCTIONS = { 144 TokenType.CURRENT_DATE: exp.CurrentDate, 145 TokenType.CURRENT_DATETIME: exp.CurrentDate, 146 TokenType.CURRENT_TIME: exp.CurrentTime, 147 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 148 TokenType.CURRENT_USER: exp.CurrentUser, 149 } 150 151 STRUCT_TYPE_TOKENS = { 152 TokenType.NESTED, 153 TokenType.OBJECT, 154 TokenType.STRUCT, 155 } 156 157 NESTED_TYPE_TOKENS = { 158 TokenType.ARRAY, 159 TokenType.LOWCARDINALITY, 160 TokenType.MAP, 161 TokenType.NULLABLE, 162 *STRUCT_TYPE_TOKENS, 163 } 164 165 ENUM_TYPE_TOKENS = { 166 TokenType.ENUM, 167 TokenType.ENUM8, 168 TokenType.ENUM16, 169 } 170 171 AGGREGATE_TYPE_TOKENS = { 172 TokenType.AGGREGATEFUNCTION, 173 TokenType.SIMPLEAGGREGATEFUNCTION, 174 } 175 176 TYPE_TOKENS = { 177 TokenType.BIT, 178 TokenType.BOOLEAN, 179 TokenType.TINYINT, 180 TokenType.UTINYINT, 181 TokenType.SMALLINT, 182 TokenType.USMALLINT, 183 TokenType.INT, 184 TokenType.UINT, 185 TokenType.BIGINT, 186 TokenType.UBIGINT, 187 TokenType.INT128, 188 TokenType.UINT128, 189 TokenType.INT256, 190 TokenType.UINT256, 191 TokenType.MEDIUMINT, 192 TokenType.UMEDIUMINT, 193 TokenType.FIXEDSTRING, 194 TokenType.FLOAT, 195 TokenType.DOUBLE, 196 TokenType.CHAR, 197 TokenType.NCHAR, 198 TokenType.VARCHAR, 199 TokenType.NVARCHAR, 200 TokenType.BPCHAR, 201 TokenType.TEXT, 202 TokenType.MEDIUMTEXT, 203 TokenType.LONGTEXT, 204 TokenType.MEDIUMBLOB, 205 TokenType.LONGBLOB, 206 TokenType.BINARY, 207 TokenType.VARBINARY, 208 TokenType.JSON, 209 TokenType.JSONB, 210 TokenType.INTERVAL, 211 TokenType.TINYBLOB, 212 TokenType.TINYTEXT, 213 TokenType.TIME, 214 TokenType.TIMETZ, 215 TokenType.TIMESTAMP, 216 TokenType.TIMESTAMP_S, 217 TokenType.TIMESTAMP_MS, 218 TokenType.TIMESTAMP_NS, 219 TokenType.TIMESTAMPTZ, 220 TokenType.TIMESTAMPLTZ, 221 TokenType.DATETIME, 222 TokenType.DATETIME64, 223 TokenType.DATE, 224 TokenType.DATE32, 225 TokenType.INT4RANGE, 226 TokenType.INT4MULTIRANGE, 227 TokenType.INT8RANGE, 228 TokenType.INT8MULTIRANGE, 229 TokenType.NUMRANGE, 230 TokenType.NUMMULTIRANGE, 231 TokenType.TSRANGE, 232 TokenType.TSMULTIRANGE, 233 TokenType.TSTZRANGE, 234 TokenType.TSTZMULTIRANGE, 235 TokenType.DATERANGE, 236 TokenType.DATEMULTIRANGE, 237 TokenType.DECIMAL, 238 TokenType.UDECIMAL, 239 TokenType.BIGDECIMAL, 240 TokenType.UUID, 241 TokenType.GEOGRAPHY, 242 TokenType.GEOMETRY, 243 TokenType.HLLSKETCH, 244 TokenType.HSTORE, 245 TokenType.PSEUDO_TYPE, 246 TokenType.SUPER, 247 TokenType.SERIAL, 248 TokenType.SMALLSERIAL, 249 TokenType.BIGSERIAL, 250 TokenType.XML, 251 TokenType.YEAR, 252 TokenType.UNIQUEIDENTIFIER, 253 TokenType.USERDEFINED, 254 TokenType.MONEY, 255 TokenType.SMALLMONEY, 256 TokenType.ROWVERSION, 257 TokenType.IMAGE, 258 TokenType.VARIANT, 259 TokenType.OBJECT, 260 TokenType.OBJECT_IDENTIFIER, 261 TokenType.INET, 262 TokenType.IPADDRESS, 263 TokenType.IPPREFIX, 264 TokenType.IPV4, 265 TokenType.IPV6, 266 TokenType.UNKNOWN, 267 TokenType.NULL, 268 TokenType.NAME, 269 *ENUM_TYPE_TOKENS, 270 *NESTED_TYPE_TOKENS, 271 *AGGREGATE_TYPE_TOKENS, 272 } 273 274 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 275 TokenType.BIGINT: TokenType.UBIGINT, 276 TokenType.INT: TokenType.UINT, 277 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 278 TokenType.SMALLINT: TokenType.USMALLINT, 279 TokenType.TINYINT: TokenType.UTINYINT, 280 TokenType.DECIMAL: TokenType.UDECIMAL, 281 } 282 283 SUBQUERY_PREDICATES = { 284 TokenType.ANY: exp.Any, 285 TokenType.ALL: exp.All, 286 TokenType.EXISTS: exp.Exists, 287 TokenType.SOME: exp.Any, 288 } 289 290 RESERVED_TOKENS = { 291 *Tokenizer.SINGLE_TOKENS.values(), 292 TokenType.SELECT, 293 } 294 295 DB_CREATABLES = { 296 TokenType.DATABASE, 297 TokenType.SCHEMA, 298 TokenType.TABLE, 299 TokenType.VIEW, 300 TokenType.MODEL, 301 TokenType.DICTIONARY, 302 TokenType.SEQUENCE, 303 TokenType.STORAGE_INTEGRATION, 304 } 305 306 CREATABLES = { 307 TokenType.COLUMN, 308 TokenType.CONSTRAINT, 309 TokenType.FUNCTION, 310 TokenType.INDEX, 311 TokenType.PROCEDURE, 312 TokenType.FOREIGN_KEY, 313 *DB_CREATABLES, 314 } 315 316 # Tokens that can represent identifiers 317 ID_VAR_TOKENS = { 318 TokenType.VAR, 319 TokenType.ANTI, 320 TokenType.APPLY, 321 TokenType.ASC, 322 TokenType.ASOF, 323 TokenType.AUTO_INCREMENT, 324 TokenType.BEGIN, 325 TokenType.BPCHAR, 326 TokenType.CACHE, 327 TokenType.CASE, 328 TokenType.COLLATE, 329 TokenType.COMMAND, 330 TokenType.COMMENT, 331 TokenType.COMMIT, 332 TokenType.CONSTRAINT, 333 TokenType.DEFAULT, 334 TokenType.DELETE, 335 TokenType.DESC, 336 TokenType.DESCRIBE, 337 TokenType.DICTIONARY, 338 TokenType.DIV, 339 TokenType.END, 340 TokenType.EXECUTE, 341 TokenType.ESCAPE, 342 TokenType.FALSE, 343 TokenType.FIRST, 344 TokenType.FILTER, 345 TokenType.FINAL, 346 TokenType.FORMAT, 347 TokenType.FULL, 348 TokenType.IS, 349 TokenType.ISNULL, 350 TokenType.INTERVAL, 351 TokenType.KEEP, 352 TokenType.KILL, 353 TokenType.LEFT, 354 TokenType.LOAD, 355 TokenType.MERGE, 356 TokenType.NATURAL, 357 TokenType.NEXT, 358 TokenType.OFFSET, 359 TokenType.OPERATOR, 360 TokenType.ORDINALITY, 361 TokenType.OVERLAPS, 362 TokenType.OVERWRITE, 363 TokenType.PARTITION, 364 TokenType.PERCENT, 365 TokenType.PIVOT, 366 TokenType.PRAGMA, 367 TokenType.RANGE, 368 TokenType.RECURSIVE, 369 TokenType.REFERENCES, 370 TokenType.REFRESH, 371 TokenType.REPLACE, 372 TokenType.RIGHT, 373 TokenType.ROW, 374 TokenType.ROWS, 375 TokenType.SEMI, 376 TokenType.SET, 377 TokenType.SETTINGS, 378 TokenType.SHOW, 379 TokenType.TEMPORARY, 380 TokenType.TOP, 381 TokenType.TRUE, 382 TokenType.TRUNCATE, 383 TokenType.UNIQUE, 384 TokenType.UNPIVOT, 385 TokenType.UPDATE, 386 TokenType.USE, 387 TokenType.VOLATILE, 388 TokenType.WINDOW, 389 *CREATABLES, 390 *SUBQUERY_PREDICATES, 391 *TYPE_TOKENS, 392 *NO_PAREN_FUNCTIONS, 393 } 394 395 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 396 397 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 398 TokenType.ANTI, 399 TokenType.APPLY, 400 TokenType.ASOF, 401 TokenType.FULL, 402 TokenType.LEFT, 403 TokenType.LOCK, 404 TokenType.NATURAL, 405 TokenType.OFFSET, 406 TokenType.RIGHT, 407 TokenType.SEMI, 408 TokenType.WINDOW, 409 } 410 411 ALIAS_TOKENS = ID_VAR_TOKENS 412 413 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 414 415 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 416 417 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 418 419 FUNC_TOKENS = { 420 TokenType.COLLATE, 421 TokenType.COMMAND, 422 TokenType.CURRENT_DATE, 423 TokenType.CURRENT_DATETIME, 424 TokenType.CURRENT_TIMESTAMP, 425 TokenType.CURRENT_TIME, 426 TokenType.CURRENT_USER, 427 TokenType.FILTER, 428 TokenType.FIRST, 429 TokenType.FORMAT, 430 TokenType.GLOB, 431 TokenType.IDENTIFIER, 432 TokenType.INDEX, 433 TokenType.ISNULL, 434 TokenType.ILIKE, 435 TokenType.INSERT, 436 TokenType.LIKE, 437 TokenType.MERGE, 438 TokenType.OFFSET, 439 TokenType.PRIMARY_KEY, 440 TokenType.RANGE, 441 TokenType.REPLACE, 442 TokenType.RLIKE, 443 TokenType.ROW, 444 TokenType.UNNEST, 445 TokenType.VAR, 446 TokenType.LEFT, 447 TokenType.RIGHT, 448 TokenType.SEQUENCE, 449 TokenType.DATE, 450 TokenType.DATETIME, 451 TokenType.TABLE, 452 TokenType.TIMESTAMP, 453 TokenType.TIMESTAMPTZ, 454 TokenType.TRUNCATE, 455 TokenType.WINDOW, 456 TokenType.XOR, 457 *TYPE_TOKENS, 458 *SUBQUERY_PREDICATES, 459 } 460 461 CONJUNCTION = { 462 TokenType.AND: exp.And, 463 TokenType.OR: exp.Or, 464 } 465 466 EQUALITY = { 467 TokenType.COLON_EQ: exp.PropertyEQ, 468 TokenType.EQ: exp.EQ, 469 TokenType.NEQ: exp.NEQ, 470 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 471 } 472 473 COMPARISON = { 474 TokenType.GT: exp.GT, 475 TokenType.GTE: exp.GTE, 476 TokenType.LT: exp.LT, 477 TokenType.LTE: exp.LTE, 478 } 479 480 BITWISE = { 481 TokenType.AMP: exp.BitwiseAnd, 482 TokenType.CARET: exp.BitwiseXor, 483 TokenType.PIPE: exp.BitwiseOr, 484 } 485 486 TERM = { 487 TokenType.DASH: exp.Sub, 488 TokenType.PLUS: exp.Add, 489 TokenType.MOD: exp.Mod, 490 TokenType.COLLATE: exp.Collate, 491 } 492 493 FACTOR = { 494 TokenType.DIV: exp.IntDiv, 495 TokenType.LR_ARROW: exp.Distance, 496 TokenType.SLASH: exp.Div, 497 TokenType.STAR: exp.Mul, 498 } 499 500 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 501 502 TIMES = { 503 TokenType.TIME, 504 TokenType.TIMETZ, 505 } 506 507 TIMESTAMPS = { 508 TokenType.TIMESTAMP, 509 TokenType.TIMESTAMPTZ, 510 TokenType.TIMESTAMPLTZ, 511 *TIMES, 512 } 513 514 SET_OPERATIONS = { 515 TokenType.UNION, 516 TokenType.INTERSECT, 517 TokenType.EXCEPT, 518 } 519 520 JOIN_METHODS = { 521 TokenType.ASOF, 522 TokenType.NATURAL, 523 TokenType.POSITIONAL, 524 } 525 526 JOIN_SIDES = { 527 TokenType.LEFT, 528 TokenType.RIGHT, 529 TokenType.FULL, 530 } 531 532 JOIN_KINDS = { 533 TokenType.INNER, 534 TokenType.OUTER, 535 TokenType.CROSS, 536 TokenType.SEMI, 537 TokenType.ANTI, 538 } 539 540 JOIN_HINTS: t.Set[str] = set() 541 542 LAMBDAS = { 543 TokenType.ARROW: lambda self, expressions: self.expression( 544 exp.Lambda, 545 this=self._replace_lambda( 546 self._parse_conjunction(), 547 {node.name for node in expressions}, 548 ), 549 expressions=expressions, 550 ), 551 TokenType.FARROW: lambda self, expressions: self.expression( 552 exp.Kwarg, 553 this=exp.var(expressions[0].name), 554 expression=self._parse_conjunction(), 555 ), 556 } 557 558 COLUMN_OPERATORS = { 559 TokenType.DOT: None, 560 TokenType.DCOLON: lambda self, this, to: self.expression( 561 exp.Cast if self.STRICT_CAST else exp.TryCast, 562 this=this, 563 to=to, 564 ), 565 TokenType.ARROW: lambda self, this, path: self.expression( 566 exp.JSONExtract, 567 this=this, 568 expression=self.dialect.to_json_path(path), 569 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 570 ), 571 TokenType.DARROW: lambda self, this, path: self.expression( 572 exp.JSONExtractScalar, 573 this=this, 574 expression=self.dialect.to_json_path(path), 575 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 576 ), 577 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 578 exp.JSONBExtract, 579 this=this, 580 expression=path, 581 ), 582 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 583 exp.JSONBExtractScalar, 584 this=this, 585 expression=path, 586 ), 587 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 588 exp.JSONBContains, 589 this=this, 590 expression=key, 591 ), 592 } 593 594 EXPRESSION_PARSERS = { 595 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 596 exp.Column: lambda self: self._parse_column(), 597 exp.Condition: lambda self: self._parse_conjunction(), 598 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 599 exp.Expression: lambda self: self._parse_expression(), 600 exp.From: lambda self: self._parse_from(), 601 exp.Group: lambda self: self._parse_group(), 602 exp.Having: lambda self: self._parse_having(), 603 exp.Identifier: lambda self: self._parse_id_var(), 604 exp.Join: lambda self: self._parse_join(), 605 exp.Lambda: lambda self: self._parse_lambda(), 606 exp.Lateral: lambda self: self._parse_lateral(), 607 exp.Limit: lambda self: self._parse_limit(), 608 exp.Offset: lambda self: self._parse_offset(), 609 exp.Order: lambda self: self._parse_order(), 610 exp.Ordered: lambda self: self._parse_ordered(), 611 exp.Properties: lambda self: self._parse_properties(), 612 exp.Qualify: lambda self: self._parse_qualify(), 613 exp.Returning: lambda self: self._parse_returning(), 614 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 615 exp.Table: lambda self: self._parse_table_parts(), 616 exp.TableAlias: lambda self: self._parse_table_alias(), 617 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 618 exp.Where: lambda self: self._parse_where(), 619 exp.Window: lambda self: self._parse_named_window(), 620 exp.With: lambda self: self._parse_with(), 621 "JOIN_TYPE": lambda self: self._parse_join_parts(), 622 } 623 624 STATEMENT_PARSERS = { 625 TokenType.ALTER: lambda self: self._parse_alter(), 626 TokenType.BEGIN: lambda self: self._parse_transaction(), 627 TokenType.CACHE: lambda self: self._parse_cache(), 628 TokenType.COMMENT: lambda self: self._parse_comment(), 629 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 630 TokenType.CREATE: lambda self: self._parse_create(), 631 TokenType.DELETE: lambda self: self._parse_delete(), 632 TokenType.DESC: lambda self: self._parse_describe(), 633 TokenType.DESCRIBE: lambda self: self._parse_describe(), 634 TokenType.DROP: lambda self: self._parse_drop(), 635 TokenType.INSERT: lambda self: self._parse_insert(), 636 TokenType.KILL: lambda self: self._parse_kill(), 637 TokenType.LOAD: lambda self: self._parse_load(), 638 TokenType.MERGE: lambda self: self._parse_merge(), 639 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 640 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 641 TokenType.REFRESH: lambda self: self._parse_refresh(), 642 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 643 TokenType.SET: lambda self: self._parse_set(), 644 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 645 TokenType.UNCACHE: lambda self: self._parse_uncache(), 646 TokenType.UPDATE: lambda self: self._parse_update(), 647 TokenType.USE: lambda self: self.expression( 648 exp.Use, 649 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 650 this=self._parse_table(schema=False), 651 ), 652 } 653 654 UNARY_PARSERS = { 655 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 656 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 657 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 658 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 659 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 660 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 661 } 662 663 STRING_PARSERS = { 664 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 665 exp.RawString, this=token.text 666 ), 667 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 668 exp.National, this=token.text 669 ), 670 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 671 TokenType.STRING: lambda self, token: self.expression( 672 exp.Literal, this=token.text, is_string=True 673 ), 674 TokenType.UNICODE_STRING: lambda self, token: self.expression( 675 exp.UnicodeString, 676 this=token.text, 677 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 678 ), 679 } 680 681 NUMERIC_PARSERS = { 682 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 683 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 684 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 685 TokenType.NUMBER: lambda self, token: self.expression( 686 exp.Literal, this=token.text, is_string=False 687 ), 688 } 689 690 PRIMARY_PARSERS = { 691 **STRING_PARSERS, 692 **NUMERIC_PARSERS, 693 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 694 TokenType.NULL: lambda self, _: self.expression(exp.Null), 695 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 696 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 697 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 698 TokenType.STAR: lambda self, _: self.expression( 699 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 700 ), 701 } 702 703 PLACEHOLDER_PARSERS = { 704 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 705 TokenType.PARAMETER: lambda self: self._parse_parameter(), 706 TokenType.COLON: lambda self: ( 707 self.expression(exp.Placeholder, this=self._prev.text) 708 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 709 else None 710 ), 711 } 712 713 RANGE_PARSERS = { 714 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 715 TokenType.GLOB: binary_range_parser(exp.Glob), 716 TokenType.ILIKE: binary_range_parser(exp.ILike), 717 TokenType.IN: lambda self, this: self._parse_in(this), 718 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 719 TokenType.IS: lambda self, this: self._parse_is(this), 720 TokenType.LIKE: binary_range_parser(exp.Like), 721 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 722 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 723 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 724 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 725 } 726 727 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 728 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 729 "AUTO": lambda self: self._parse_auto_property(), 730 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 731 "BACKUP": lambda self: self.expression( 732 exp.BackupProperty, this=self._parse_var(any_token=True) 733 ), 734 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 735 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 736 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 737 "CHECKSUM": lambda self: self._parse_checksum(), 738 "CLUSTER BY": lambda self: self._parse_cluster(), 739 "CLUSTERED": lambda self: self._parse_clustered_by(), 740 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 741 exp.CollateProperty, **kwargs 742 ), 743 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 744 "CONTAINS": lambda self: self._parse_contains_property(), 745 "COPY": lambda self: self._parse_copy_property(), 746 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 747 "DEFINER": lambda self: self._parse_definer(), 748 "DETERMINISTIC": lambda self: self.expression( 749 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 750 ), 751 "DISTKEY": lambda self: self._parse_distkey(), 752 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 753 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 754 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 755 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 756 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 757 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 758 "FREESPACE": lambda self: self._parse_freespace(), 759 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 760 "HEAP": lambda self: self.expression(exp.HeapProperty), 761 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 762 "IMMUTABLE": lambda self: self.expression( 763 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 764 ), 765 "INHERITS": lambda self: self.expression( 766 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 767 ), 768 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 769 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 770 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 771 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 772 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 773 "LIKE": lambda self: self._parse_create_like(), 774 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 775 "LOCK": lambda self: self._parse_locking(), 776 "LOCKING": lambda self: self._parse_locking(), 777 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 778 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 779 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 780 "MODIFIES": lambda self: self._parse_modifies_property(), 781 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 782 "NO": lambda self: self._parse_no_property(), 783 "ON": lambda self: self._parse_on_property(), 784 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 785 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 786 "PARTITION": lambda self: self._parse_partitioned_of(), 787 "PARTITION BY": lambda self: self._parse_partitioned_by(), 788 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 789 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 790 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 791 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 792 "READS": lambda self: self._parse_reads_property(), 793 "REMOTE": lambda self: self._parse_remote_with_connection(), 794 "RETURNS": lambda self: self._parse_returns(), 795 "ROW": lambda self: self._parse_row(), 796 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 797 "SAMPLE": lambda self: self.expression( 798 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 799 ), 800 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 801 "SETTINGS": lambda self: self.expression( 802 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 803 ), 804 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 805 "SORTKEY": lambda self: self._parse_sortkey(), 806 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 807 "STABLE": lambda self: self.expression( 808 exp.StabilityProperty, this=exp.Literal.string("STABLE") 809 ), 810 "STORED": lambda self: self._parse_stored(), 811 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 812 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 813 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 814 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 815 "TO": lambda self: self._parse_to_table(), 816 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 817 "TRANSFORM": lambda self: self.expression( 818 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 819 ), 820 "TTL": lambda self: self._parse_ttl(), 821 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 822 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 823 "VOLATILE": lambda self: self._parse_volatile_property(), 824 "WITH": lambda self: self._parse_with_property(), 825 } 826 827 CONSTRAINT_PARSERS = { 828 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 829 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 830 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 831 "CHARACTER SET": lambda self: self.expression( 832 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 833 ), 834 "CHECK": lambda self: self.expression( 835 exp.CheckColumnConstraint, 836 this=self._parse_wrapped(self._parse_conjunction), 837 enforced=self._match_text_seq("ENFORCED"), 838 ), 839 "COLLATE": lambda self: self.expression( 840 exp.CollateColumnConstraint, this=self._parse_var() 841 ), 842 "COMMENT": lambda self: self.expression( 843 exp.CommentColumnConstraint, this=self._parse_string() 844 ), 845 "COMPRESS": lambda self: self._parse_compress(), 846 "CLUSTERED": lambda self: self.expression( 847 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 848 ), 849 "NONCLUSTERED": lambda self: self.expression( 850 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 851 ), 852 "DEFAULT": lambda self: self.expression( 853 exp.DefaultColumnConstraint, this=self._parse_bitwise() 854 ), 855 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 856 "EXCLUDE": lambda self: self.expression( 857 exp.ExcludeColumnConstraint, this=self._parse_index_params() 858 ), 859 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 860 "FORMAT": lambda self: self.expression( 861 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 862 ), 863 "GENERATED": lambda self: self._parse_generated_as_identity(), 864 "IDENTITY": lambda self: self._parse_auto_increment(), 865 "INLINE": lambda self: self._parse_inline(), 866 "LIKE": lambda self: self._parse_create_like(), 867 "NOT": lambda self: self._parse_not_constraint(), 868 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 869 "ON": lambda self: ( 870 self._match(TokenType.UPDATE) 871 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 872 ) 873 or self.expression(exp.OnProperty, this=self._parse_id_var()), 874 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 875 "PERIOD": lambda self: self._parse_period_for_system_time(), 876 "PRIMARY KEY": lambda self: self._parse_primary_key(), 877 "REFERENCES": lambda self: self._parse_references(match=False), 878 "TITLE": lambda self: self.expression( 879 exp.TitleColumnConstraint, this=self._parse_var_or_string() 880 ), 881 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 882 "UNIQUE": lambda self: self._parse_unique(), 883 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 884 "WITH": lambda self: self.expression( 885 exp.Properties, expressions=self._parse_wrapped_properties() 886 ), 887 } 888 889 ALTER_PARSERS = { 890 "ADD": lambda self: self._parse_alter_table_add(), 891 "ALTER": lambda self: self._parse_alter_table_alter(), 892 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 893 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 894 "DROP": lambda self: self._parse_alter_table_drop(), 895 "RENAME": lambda self: self._parse_alter_table_rename(), 896 } 897 898 SCHEMA_UNNAMED_CONSTRAINTS = { 899 "CHECK", 900 "EXCLUDE", 901 "FOREIGN KEY", 902 "LIKE", 903 "PERIOD", 904 "PRIMARY KEY", 905 "UNIQUE", 906 } 907 908 NO_PAREN_FUNCTION_PARSERS = { 909 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 910 "CASE": lambda self: self._parse_case(), 911 "IF": lambda self: self._parse_if(), 912 "NEXT": lambda self: self._parse_next_value_for(), 913 } 914 915 INVALID_FUNC_NAME_TOKENS = { 916 TokenType.IDENTIFIER, 917 TokenType.STRING, 918 } 919 920 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 921 922 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 923 924 FUNCTION_PARSERS = { 925 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 926 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 927 "DECODE": lambda self: self._parse_decode(), 928 "EXTRACT": lambda self: self._parse_extract(), 929 "JSON_OBJECT": lambda self: self._parse_json_object(), 930 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 931 "JSON_TABLE": lambda self: self._parse_json_table(), 932 "MATCH": lambda self: self._parse_match_against(), 933 "OPENJSON": lambda self: self._parse_open_json(), 934 "POSITION": lambda self: self._parse_position(), 935 "PREDICT": lambda self: self._parse_predict(), 936 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 937 "STRING_AGG": lambda self: self._parse_string_agg(), 938 "SUBSTRING": lambda self: self._parse_substring(), 939 "TRIM": lambda self: self._parse_trim(), 940 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 941 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 942 } 943 944 QUERY_MODIFIER_PARSERS = { 945 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 946 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 947 TokenType.WHERE: lambda self: ("where", self._parse_where()), 948 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 949 TokenType.HAVING: lambda self: ("having", self._parse_having()), 950 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 951 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 952 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 953 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 954 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 955 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 956 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 957 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 958 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 959 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 960 TokenType.CLUSTER_BY: lambda self: ( 961 "cluster", 962 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 963 ), 964 TokenType.DISTRIBUTE_BY: lambda self: ( 965 "distribute", 966 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 967 ), 968 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 969 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 970 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 971 } 972 973 SET_PARSERS = { 974 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 975 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 976 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 977 "TRANSACTION": lambda self: self._parse_set_transaction(), 978 } 979 980 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 981 982 TYPE_LITERAL_PARSERS = { 983 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 984 } 985 986 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 987 988 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 989 990 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 991 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 992 "ISOLATION": ( 993 ("LEVEL", "REPEATABLE", "READ"), 994 ("LEVEL", "READ", "COMMITTED"), 995 ("LEVEL", "READ", "UNCOMITTED"), 996 ("LEVEL", "SERIALIZABLE"), 997 ), 998 "READ": ("WRITE", "ONLY"), 999 } 1000 1001 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1002 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1003 ) 1004 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1005 1006 CREATE_SEQUENCE: OPTIONS_TYPE = { 1007 "SCALE": ("EXTEND", "NOEXTEND"), 1008 "SHARD": ("EXTEND", "NOEXTEND"), 1009 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1010 **dict.fromkeys( 1011 ( 1012 "SESSION", 1013 "GLOBAL", 1014 "KEEP", 1015 "NOKEEP", 1016 "ORDER", 1017 "NOORDER", 1018 "NOCACHE", 1019 "CYCLE", 1020 "NOCYCLE", 1021 "NOMINVALUE", 1022 "NOMAXVALUE", 1023 "NOSCALE", 1024 "NOSHARD", 1025 ), 1026 tuple(), 1027 ), 1028 } 1029 1030 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1031 1032 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1033 1034 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1035 1036 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1037 1038 CLONE_KEYWORDS = {"CLONE", "COPY"} 1039 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1040 1041 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1042 1043 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1044 1045 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1046 1047 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1048 1049 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1050 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1051 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1052 1053 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1054 1055 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1056 1057 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1058 1059 DISTINCT_TOKENS = {TokenType.DISTINCT} 1060 1061 NULL_TOKENS = {TokenType.NULL} 1062 1063 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1064 1065 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1066 1067 STRICT_CAST = True 1068 1069 PREFIXED_PIVOT_COLUMNS = False 1070 IDENTIFY_PIVOT_STRINGS = False 1071 1072 LOG_DEFAULTS_TO_LN = False 1073 1074 # Whether ADD is present for each column added by ALTER TABLE 1075 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1076 1077 # Whether the table sample clause expects CSV syntax 1078 TABLESAMPLE_CSV = False 1079 1080 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1081 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1082 1083 # Whether the TRIM function expects the characters to trim as its first argument 1084 TRIM_PATTERN_FIRST = False 1085 1086 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1087 STRING_ALIASES = False 1088 1089 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1090 MODIFIERS_ATTACHED_TO_UNION = True 1091 UNION_MODIFIERS = {"order", "limit", "offset"} 1092 1093 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1094 NO_PAREN_IF_COMMANDS = True 1095 1096 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1097 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1098 1099 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1100 # If this is True and '(' is not found, the keyword will be treated as an identifier 1101 VALUES_FOLLOWED_BY_PAREN = True 1102 1103 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1104 SUPPORTS_IMPLICIT_UNNEST = False 1105 1106 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1107 INTERVAL_SPANS = True 1108 1109 __slots__ = ( 1110 "error_level", 1111 "error_message_context", 1112 "max_errors", 1113 "dialect", 1114 "sql", 1115 "errors", 1116 "_tokens", 1117 "_index", 1118 "_curr", 1119 "_next", 1120 "_prev", 1121 "_prev_comments", 1122 ) 1123 1124 # Autofilled 1125 SHOW_TRIE: t.Dict = {} 1126 SET_TRIE: t.Dict = {} 1127 1128 def __init__( 1129 self, 1130 error_level: t.Optional[ErrorLevel] = None, 1131 error_message_context: int = 100, 1132 max_errors: int = 3, 1133 dialect: DialectType = None, 1134 ): 1135 from sqlglot.dialects import Dialect 1136 1137 self.error_level = error_level or ErrorLevel.IMMEDIATE 1138 self.error_message_context = error_message_context 1139 self.max_errors = max_errors 1140 self.dialect = Dialect.get_or_raise(dialect) 1141 self.reset() 1142 1143 def reset(self): 1144 self.sql = "" 1145 self.errors = [] 1146 self._tokens = [] 1147 self._index = 0 1148 self._curr = None 1149 self._next = None 1150 self._prev = None 1151 self._prev_comments = None 1152 1153 def parse( 1154 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1155 ) -> t.List[t.Optional[exp.Expression]]: 1156 """ 1157 Parses a list of tokens and returns a list of syntax trees, one tree 1158 per parsed SQL statement. 1159 1160 Args: 1161 raw_tokens: The list of tokens. 1162 sql: The original SQL string, used to produce helpful debug messages. 1163 1164 Returns: 1165 The list of the produced syntax trees. 1166 """ 1167 return self._parse( 1168 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1169 ) 1170 1171 def parse_into( 1172 self, 1173 expression_types: exp.IntoType, 1174 raw_tokens: t.List[Token], 1175 sql: t.Optional[str] = None, 1176 ) -> t.List[t.Optional[exp.Expression]]: 1177 """ 1178 Parses a list of tokens into a given Expression type. If a collection of Expression 1179 types is given instead, this method will try to parse the token list into each one 1180 of them, stopping at the first for which the parsing succeeds. 1181 1182 Args: 1183 expression_types: The expression type(s) to try and parse the token list into. 1184 raw_tokens: The list of tokens. 1185 sql: The original SQL string, used to produce helpful debug messages. 1186 1187 Returns: 1188 The target Expression. 1189 """ 1190 errors = [] 1191 for expression_type in ensure_list(expression_types): 1192 parser = self.EXPRESSION_PARSERS.get(expression_type) 1193 if not parser: 1194 raise TypeError(f"No parser registered for {expression_type}") 1195 1196 try: 1197 return self._parse(parser, raw_tokens, sql) 1198 except ParseError as e: 1199 e.errors[0]["into_expression"] = expression_type 1200 errors.append(e) 1201 1202 raise ParseError( 1203 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1204 errors=merge_errors(errors), 1205 ) from errors[-1] 1206 1207 def _parse( 1208 self, 1209 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1210 raw_tokens: t.List[Token], 1211 sql: t.Optional[str] = None, 1212 ) -> t.List[t.Optional[exp.Expression]]: 1213 self.reset() 1214 self.sql = sql or "" 1215 1216 total = len(raw_tokens) 1217 chunks: t.List[t.List[Token]] = [[]] 1218 1219 for i, token in enumerate(raw_tokens): 1220 if token.token_type == TokenType.SEMICOLON: 1221 if i < total - 1: 1222 chunks.append([]) 1223 else: 1224 chunks[-1].append(token) 1225 1226 expressions = [] 1227 1228 for tokens in chunks: 1229 self._index = -1 1230 self._tokens = tokens 1231 self._advance() 1232 1233 expressions.append(parse_method(self)) 1234 1235 if self._index < len(self._tokens): 1236 self.raise_error("Invalid expression / Unexpected token") 1237 1238 self.check_errors() 1239 1240 return expressions 1241 1242 def check_errors(self) -> None: 1243 """Logs or raises any found errors, depending on the chosen error level setting.""" 1244 if self.error_level == ErrorLevel.WARN: 1245 for error in self.errors: 1246 logger.error(str(error)) 1247 elif self.error_level == ErrorLevel.RAISE and self.errors: 1248 raise ParseError( 1249 concat_messages(self.errors, self.max_errors), 1250 errors=merge_errors(self.errors), 1251 ) 1252 1253 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1254 """ 1255 Appends an error in the list of recorded errors or raises it, depending on the chosen 1256 error level setting. 1257 """ 1258 token = token or self._curr or self._prev or Token.string("") 1259 start = token.start 1260 end = token.end + 1 1261 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1262 highlight = self.sql[start:end] 1263 end_context = self.sql[end : end + self.error_message_context] 1264 1265 error = ParseError.new( 1266 f"{message}. Line {token.line}, Col: {token.col}.\n" 1267 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1268 description=message, 1269 line=token.line, 1270 col=token.col, 1271 start_context=start_context, 1272 highlight=highlight, 1273 end_context=end_context, 1274 ) 1275 1276 if self.error_level == ErrorLevel.IMMEDIATE: 1277 raise error 1278 1279 self.errors.append(error) 1280 1281 def expression( 1282 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1283 ) -> E: 1284 """ 1285 Creates a new, validated Expression. 1286 1287 Args: 1288 exp_class: The expression class to instantiate. 1289 comments: An optional list of comments to attach to the expression. 1290 kwargs: The arguments to set for the expression along with their respective values. 1291 1292 Returns: 1293 The target expression. 1294 """ 1295 instance = exp_class(**kwargs) 1296 instance.add_comments(comments) if comments else self._add_comments(instance) 1297 return self.validate_expression(instance) 1298 1299 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1300 if expression and self._prev_comments: 1301 expression.add_comments(self._prev_comments) 1302 self._prev_comments = None 1303 1304 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1305 """ 1306 Validates an Expression, making sure that all its mandatory arguments are set. 1307 1308 Args: 1309 expression: The expression to validate. 1310 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1311 1312 Returns: 1313 The validated expression. 1314 """ 1315 if self.error_level != ErrorLevel.IGNORE: 1316 for error_message in expression.error_messages(args): 1317 self.raise_error(error_message) 1318 1319 return expression 1320 1321 def _find_sql(self, start: Token, end: Token) -> str: 1322 return self.sql[start.start : end.end + 1] 1323 1324 def _is_connected(self) -> bool: 1325 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1326 1327 def _advance(self, times: int = 1) -> None: 1328 self._index += times 1329 self._curr = seq_get(self._tokens, self._index) 1330 self._next = seq_get(self._tokens, self._index + 1) 1331 1332 if self._index > 0: 1333 self._prev = self._tokens[self._index - 1] 1334 self._prev_comments = self._prev.comments 1335 else: 1336 self._prev = None 1337 self._prev_comments = None 1338 1339 def _retreat(self, index: int) -> None: 1340 if index != self._index: 1341 self._advance(index - self._index) 1342 1343 def _warn_unsupported(self) -> None: 1344 if len(self._tokens) <= 1: 1345 return 1346 1347 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1348 # interested in emitting a warning for the one being currently processed. 1349 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1350 1351 logger.warning( 1352 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1353 ) 1354 1355 def _parse_command(self) -> exp.Command: 1356 self._warn_unsupported() 1357 return self.expression( 1358 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1359 ) 1360 1361 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1362 """ 1363 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1364 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1365 the parser state accordingly 1366 """ 1367 index = self._index 1368 error_level = self.error_level 1369 1370 self.error_level = ErrorLevel.IMMEDIATE 1371 try: 1372 this = parse_method() 1373 except ParseError: 1374 this = None 1375 finally: 1376 if not this or retreat: 1377 self._retreat(index) 1378 self.error_level = error_level 1379 1380 return this 1381 1382 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1383 start = self._prev 1384 exists = self._parse_exists() if allow_exists else None 1385 1386 self._match(TokenType.ON) 1387 1388 kind = self._match_set(self.CREATABLES) and self._prev 1389 if not kind: 1390 return self._parse_as_command(start) 1391 1392 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1393 this = self._parse_user_defined_function(kind=kind.token_type) 1394 elif kind.token_type == TokenType.TABLE: 1395 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1396 elif kind.token_type == TokenType.COLUMN: 1397 this = self._parse_column() 1398 else: 1399 this = self._parse_id_var() 1400 1401 self._match(TokenType.IS) 1402 1403 return self.expression( 1404 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1405 ) 1406 1407 def _parse_to_table( 1408 self, 1409 ) -> exp.ToTableProperty: 1410 table = self._parse_table_parts(schema=True) 1411 return self.expression(exp.ToTableProperty, this=table) 1412 1413 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1414 def _parse_ttl(self) -> exp.Expression: 1415 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1416 this = self._parse_bitwise() 1417 1418 if self._match_text_seq("DELETE"): 1419 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1420 if self._match_text_seq("RECOMPRESS"): 1421 return self.expression( 1422 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1423 ) 1424 if self._match_text_seq("TO", "DISK"): 1425 return self.expression( 1426 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1427 ) 1428 if self._match_text_seq("TO", "VOLUME"): 1429 return self.expression( 1430 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1431 ) 1432 1433 return this 1434 1435 expressions = self._parse_csv(_parse_ttl_action) 1436 where = self._parse_where() 1437 group = self._parse_group() 1438 1439 aggregates = None 1440 if group and self._match(TokenType.SET): 1441 aggregates = self._parse_csv(self._parse_set_item) 1442 1443 return self.expression( 1444 exp.MergeTreeTTL, 1445 expressions=expressions, 1446 where=where, 1447 group=group, 1448 aggregates=aggregates, 1449 ) 1450 1451 def _parse_statement(self) -> t.Optional[exp.Expression]: 1452 if self._curr is None: 1453 return None 1454 1455 if self._match_set(self.STATEMENT_PARSERS): 1456 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1457 1458 if self._match_set(Tokenizer.COMMANDS): 1459 return self._parse_command() 1460 1461 expression = self._parse_expression() 1462 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1463 return self._parse_query_modifiers(expression) 1464 1465 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1466 start = self._prev 1467 temporary = self._match(TokenType.TEMPORARY) 1468 materialized = self._match_text_seq("MATERIALIZED") 1469 1470 kind = self._match_set(self.CREATABLES) and self._prev.text 1471 if not kind: 1472 return self._parse_as_command(start) 1473 1474 if_exists = exists or self._parse_exists() 1475 table = self._parse_table_parts( 1476 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1477 ) 1478 1479 if self._match(TokenType.L_PAREN, advance=False): 1480 expressions = self._parse_wrapped_csv(self._parse_types) 1481 else: 1482 expressions = None 1483 1484 return self.expression( 1485 exp.Drop, 1486 comments=start.comments, 1487 exists=if_exists, 1488 this=table, 1489 expressions=expressions, 1490 kind=kind, 1491 temporary=temporary, 1492 materialized=materialized, 1493 cascade=self._match_text_seq("CASCADE"), 1494 constraints=self._match_text_seq("CONSTRAINTS"), 1495 purge=self._match_text_seq("PURGE"), 1496 ) 1497 1498 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1499 return ( 1500 self._match_text_seq("IF") 1501 and (not not_ or self._match(TokenType.NOT)) 1502 and self._match(TokenType.EXISTS) 1503 ) 1504 1505 def _parse_create(self) -> exp.Create | exp.Command: 1506 # Note: this can't be None because we've matched a statement parser 1507 start = self._prev 1508 comments = self._prev_comments 1509 1510 replace = ( 1511 start.token_type == TokenType.REPLACE 1512 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1513 or self._match_pair(TokenType.OR, TokenType.ALTER) 1514 ) 1515 1516 unique = self._match(TokenType.UNIQUE) 1517 1518 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1519 self._advance() 1520 1521 properties = None 1522 create_token = self._match_set(self.CREATABLES) and self._prev 1523 1524 if not create_token: 1525 # exp.Properties.Location.POST_CREATE 1526 properties = self._parse_properties() 1527 create_token = self._match_set(self.CREATABLES) and self._prev 1528 1529 if not properties or not create_token: 1530 return self._parse_as_command(start) 1531 1532 exists = self._parse_exists(not_=True) 1533 this = None 1534 expression: t.Optional[exp.Expression] = None 1535 indexes = None 1536 no_schema_binding = None 1537 begin = None 1538 end = None 1539 clone = None 1540 1541 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1542 nonlocal properties 1543 if properties and temp_props: 1544 properties.expressions.extend(temp_props.expressions) 1545 elif temp_props: 1546 properties = temp_props 1547 1548 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1549 this = self._parse_user_defined_function(kind=create_token.token_type) 1550 1551 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1552 extend_props(self._parse_properties()) 1553 1554 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1555 1556 if not expression: 1557 if self._match(TokenType.COMMAND): 1558 expression = self._parse_as_command(self._prev) 1559 else: 1560 begin = self._match(TokenType.BEGIN) 1561 return_ = self._match_text_seq("RETURN") 1562 1563 if self._match(TokenType.STRING, advance=False): 1564 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1565 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1566 expression = self._parse_string() 1567 extend_props(self._parse_properties()) 1568 else: 1569 expression = self._parse_statement() 1570 1571 end = self._match_text_seq("END") 1572 1573 if return_: 1574 expression = self.expression(exp.Return, this=expression) 1575 elif create_token.token_type == TokenType.INDEX: 1576 this = self._parse_index(index=self._parse_id_var()) 1577 elif create_token.token_type in self.DB_CREATABLES: 1578 table_parts = self._parse_table_parts( 1579 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1580 ) 1581 1582 # exp.Properties.Location.POST_NAME 1583 self._match(TokenType.COMMA) 1584 extend_props(self._parse_properties(before=True)) 1585 1586 this = self._parse_schema(this=table_parts) 1587 1588 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1589 extend_props(self._parse_properties()) 1590 1591 self._match(TokenType.ALIAS) 1592 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1593 # exp.Properties.Location.POST_ALIAS 1594 extend_props(self._parse_properties()) 1595 1596 if create_token.token_type == TokenType.SEQUENCE: 1597 expression = self._parse_types() 1598 extend_props(self._parse_properties()) 1599 else: 1600 expression = self._parse_ddl_select() 1601 1602 if create_token.token_type == TokenType.TABLE: 1603 # exp.Properties.Location.POST_EXPRESSION 1604 extend_props(self._parse_properties()) 1605 1606 indexes = [] 1607 while True: 1608 index = self._parse_index() 1609 1610 # exp.Properties.Location.POST_INDEX 1611 extend_props(self._parse_properties()) 1612 1613 if not index: 1614 break 1615 else: 1616 self._match(TokenType.COMMA) 1617 indexes.append(index) 1618 elif create_token.token_type == TokenType.VIEW: 1619 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1620 no_schema_binding = True 1621 1622 shallow = self._match_text_seq("SHALLOW") 1623 1624 if self._match_texts(self.CLONE_KEYWORDS): 1625 copy = self._prev.text.lower() == "copy" 1626 clone = self.expression( 1627 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1628 ) 1629 1630 if self._curr: 1631 return self._parse_as_command(start) 1632 1633 return self.expression( 1634 exp.Create, 1635 comments=comments, 1636 this=this, 1637 kind=create_token.text.upper(), 1638 replace=replace, 1639 unique=unique, 1640 expression=expression, 1641 exists=exists, 1642 properties=properties, 1643 indexes=indexes, 1644 no_schema_binding=no_schema_binding, 1645 begin=begin, 1646 end=end, 1647 clone=clone, 1648 ) 1649 1650 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1651 seq = exp.SequenceProperties() 1652 1653 options = [] 1654 index = self._index 1655 1656 while self._curr: 1657 if self._match_text_seq("INCREMENT"): 1658 self._match_text_seq("BY") 1659 self._match_text_seq("=") 1660 seq.set("increment", self._parse_term()) 1661 elif self._match_text_seq("MINVALUE"): 1662 seq.set("minvalue", self._parse_term()) 1663 elif self._match_text_seq("MAXVALUE"): 1664 seq.set("maxvalue", self._parse_term()) 1665 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1666 self._match_text_seq("=") 1667 seq.set("start", self._parse_term()) 1668 elif self._match_text_seq("CACHE"): 1669 # T-SQL allows empty CACHE which is initialized dynamically 1670 seq.set("cache", self._parse_number() or True) 1671 elif self._match_text_seq("OWNED", "BY"): 1672 # "OWNED BY NONE" is the default 1673 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1674 else: 1675 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1676 if opt: 1677 options.append(opt) 1678 else: 1679 break 1680 1681 seq.set("options", options if options else None) 1682 return None if self._index == index else seq 1683 1684 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1685 # only used for teradata currently 1686 self._match(TokenType.COMMA) 1687 1688 kwargs = { 1689 "no": self._match_text_seq("NO"), 1690 "dual": self._match_text_seq("DUAL"), 1691 "before": self._match_text_seq("BEFORE"), 1692 "default": self._match_text_seq("DEFAULT"), 1693 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1694 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1695 "after": self._match_text_seq("AFTER"), 1696 "minimum": self._match_texts(("MIN", "MINIMUM")), 1697 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1698 } 1699 1700 if self._match_texts(self.PROPERTY_PARSERS): 1701 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1702 try: 1703 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1704 except TypeError: 1705 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1706 1707 return None 1708 1709 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1710 return self._parse_wrapped_csv(self._parse_property) 1711 1712 def _parse_property(self) -> t.Optional[exp.Expression]: 1713 if self._match_texts(self.PROPERTY_PARSERS): 1714 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1715 1716 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1717 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1718 1719 if self._match_text_seq("COMPOUND", "SORTKEY"): 1720 return self._parse_sortkey(compound=True) 1721 1722 if self._match_text_seq("SQL", "SECURITY"): 1723 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1724 1725 index = self._index 1726 key = self._parse_column() 1727 1728 if not self._match(TokenType.EQ): 1729 self._retreat(index) 1730 return self._parse_sequence_properties() 1731 1732 return self.expression( 1733 exp.Property, 1734 this=key.to_dot() if isinstance(key, exp.Column) else key, 1735 value=self._parse_bitwise() or self._parse_var(any_token=True), 1736 ) 1737 1738 def _parse_stored(self) -> exp.FileFormatProperty: 1739 self._match(TokenType.ALIAS) 1740 1741 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1742 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1743 1744 return self.expression( 1745 exp.FileFormatProperty, 1746 this=( 1747 self.expression( 1748 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1749 ) 1750 if input_format or output_format 1751 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1752 ), 1753 ) 1754 1755 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1756 self._match(TokenType.EQ) 1757 self._match(TokenType.ALIAS) 1758 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1759 1760 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1761 properties = [] 1762 while True: 1763 if before: 1764 prop = self._parse_property_before() 1765 else: 1766 prop = self._parse_property() 1767 if not prop: 1768 break 1769 for p in ensure_list(prop): 1770 properties.append(p) 1771 1772 if properties: 1773 return self.expression(exp.Properties, expressions=properties) 1774 1775 return None 1776 1777 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1778 return self.expression( 1779 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1780 ) 1781 1782 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1783 if self._index >= 2: 1784 pre_volatile_token = self._tokens[self._index - 2] 1785 else: 1786 pre_volatile_token = None 1787 1788 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1789 return exp.VolatileProperty() 1790 1791 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1792 1793 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1794 self._match_pair(TokenType.EQ, TokenType.ON) 1795 1796 prop = self.expression(exp.WithSystemVersioningProperty) 1797 if self._match(TokenType.L_PAREN): 1798 self._match_text_seq("HISTORY_TABLE", "=") 1799 prop.set("this", self._parse_table_parts()) 1800 1801 if self._match(TokenType.COMMA): 1802 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1803 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1804 1805 self._match_r_paren() 1806 1807 return prop 1808 1809 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1810 if self._match(TokenType.L_PAREN, advance=False): 1811 return self._parse_wrapped_properties() 1812 1813 if self._match_text_seq("JOURNAL"): 1814 return self._parse_withjournaltable() 1815 1816 if self._match_texts(self.VIEW_ATTRIBUTES): 1817 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1818 1819 if self._match_text_seq("DATA"): 1820 return self._parse_withdata(no=False) 1821 elif self._match_text_seq("NO", "DATA"): 1822 return self._parse_withdata(no=True) 1823 1824 if not self._next: 1825 return None 1826 1827 return self._parse_withisolatedloading() 1828 1829 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1830 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1831 self._match(TokenType.EQ) 1832 1833 user = self._parse_id_var() 1834 self._match(TokenType.PARAMETER) 1835 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1836 1837 if not user or not host: 1838 return None 1839 1840 return exp.DefinerProperty(this=f"{user}@{host}") 1841 1842 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1843 self._match(TokenType.TABLE) 1844 self._match(TokenType.EQ) 1845 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1846 1847 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1848 return self.expression(exp.LogProperty, no=no) 1849 1850 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1851 return self.expression(exp.JournalProperty, **kwargs) 1852 1853 def _parse_checksum(self) -> exp.ChecksumProperty: 1854 self._match(TokenType.EQ) 1855 1856 on = None 1857 if self._match(TokenType.ON): 1858 on = True 1859 elif self._match_text_seq("OFF"): 1860 on = False 1861 1862 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1863 1864 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1865 return self.expression( 1866 exp.Cluster, 1867 expressions=( 1868 self._parse_wrapped_csv(self._parse_ordered) 1869 if wrapped 1870 else self._parse_csv(self._parse_ordered) 1871 ), 1872 ) 1873 1874 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1875 self._match_text_seq("BY") 1876 1877 self._match_l_paren() 1878 expressions = self._parse_csv(self._parse_column) 1879 self._match_r_paren() 1880 1881 if self._match_text_seq("SORTED", "BY"): 1882 self._match_l_paren() 1883 sorted_by = self._parse_csv(self._parse_ordered) 1884 self._match_r_paren() 1885 else: 1886 sorted_by = None 1887 1888 self._match(TokenType.INTO) 1889 buckets = self._parse_number() 1890 self._match_text_seq("BUCKETS") 1891 1892 return self.expression( 1893 exp.ClusteredByProperty, 1894 expressions=expressions, 1895 sorted_by=sorted_by, 1896 buckets=buckets, 1897 ) 1898 1899 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1900 if not self._match_text_seq("GRANTS"): 1901 self._retreat(self._index - 1) 1902 return None 1903 1904 return self.expression(exp.CopyGrantsProperty) 1905 1906 def _parse_freespace(self) -> exp.FreespaceProperty: 1907 self._match(TokenType.EQ) 1908 return self.expression( 1909 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1910 ) 1911 1912 def _parse_mergeblockratio( 1913 self, no: bool = False, default: bool = False 1914 ) -> exp.MergeBlockRatioProperty: 1915 if self._match(TokenType.EQ): 1916 return self.expression( 1917 exp.MergeBlockRatioProperty, 1918 this=self._parse_number(), 1919 percent=self._match(TokenType.PERCENT), 1920 ) 1921 1922 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1923 1924 def _parse_datablocksize( 1925 self, 1926 default: t.Optional[bool] = None, 1927 minimum: t.Optional[bool] = None, 1928 maximum: t.Optional[bool] = None, 1929 ) -> exp.DataBlocksizeProperty: 1930 self._match(TokenType.EQ) 1931 size = self._parse_number() 1932 1933 units = None 1934 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1935 units = self._prev.text 1936 1937 return self.expression( 1938 exp.DataBlocksizeProperty, 1939 size=size, 1940 units=units, 1941 default=default, 1942 minimum=minimum, 1943 maximum=maximum, 1944 ) 1945 1946 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1947 self._match(TokenType.EQ) 1948 always = self._match_text_seq("ALWAYS") 1949 manual = self._match_text_seq("MANUAL") 1950 never = self._match_text_seq("NEVER") 1951 default = self._match_text_seq("DEFAULT") 1952 1953 autotemp = None 1954 if self._match_text_seq("AUTOTEMP"): 1955 autotemp = self._parse_schema() 1956 1957 return self.expression( 1958 exp.BlockCompressionProperty, 1959 always=always, 1960 manual=manual, 1961 never=never, 1962 default=default, 1963 autotemp=autotemp, 1964 ) 1965 1966 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 1967 index = self._index 1968 no = self._match_text_seq("NO") 1969 concurrent = self._match_text_seq("CONCURRENT") 1970 1971 if not self._match_text_seq("ISOLATED", "LOADING"): 1972 self._retreat(index) 1973 return None 1974 1975 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 1976 return self.expression( 1977 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 1978 ) 1979 1980 def _parse_locking(self) -> exp.LockingProperty: 1981 if self._match(TokenType.TABLE): 1982 kind = "TABLE" 1983 elif self._match(TokenType.VIEW): 1984 kind = "VIEW" 1985 elif self._match(TokenType.ROW): 1986 kind = "ROW" 1987 elif self._match_text_seq("DATABASE"): 1988 kind = "DATABASE" 1989 else: 1990 kind = None 1991 1992 if kind in ("DATABASE", "TABLE", "VIEW"): 1993 this = self._parse_table_parts() 1994 else: 1995 this = None 1996 1997 if self._match(TokenType.FOR): 1998 for_or_in = "FOR" 1999 elif self._match(TokenType.IN): 2000 for_or_in = "IN" 2001 else: 2002 for_or_in = None 2003 2004 if self._match_text_seq("ACCESS"): 2005 lock_type = "ACCESS" 2006 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2007 lock_type = "EXCLUSIVE" 2008 elif self._match_text_seq("SHARE"): 2009 lock_type = "SHARE" 2010 elif self._match_text_seq("READ"): 2011 lock_type = "READ" 2012 elif self._match_text_seq("WRITE"): 2013 lock_type = "WRITE" 2014 elif self._match_text_seq("CHECKSUM"): 2015 lock_type = "CHECKSUM" 2016 else: 2017 lock_type = None 2018 2019 override = self._match_text_seq("OVERRIDE") 2020 2021 return self.expression( 2022 exp.LockingProperty, 2023 this=this, 2024 kind=kind, 2025 for_or_in=for_or_in, 2026 lock_type=lock_type, 2027 override=override, 2028 ) 2029 2030 def _parse_partition_by(self) -> t.List[exp.Expression]: 2031 if self._match(TokenType.PARTITION_BY): 2032 return self._parse_csv(self._parse_conjunction) 2033 return [] 2034 2035 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2036 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2037 if self._match_text_seq("MINVALUE"): 2038 return exp.var("MINVALUE") 2039 if self._match_text_seq("MAXVALUE"): 2040 return exp.var("MAXVALUE") 2041 return self._parse_bitwise() 2042 2043 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2044 expression = None 2045 from_expressions = None 2046 to_expressions = None 2047 2048 if self._match(TokenType.IN): 2049 this = self._parse_wrapped_csv(self._parse_bitwise) 2050 elif self._match(TokenType.FROM): 2051 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2052 self._match_text_seq("TO") 2053 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2054 elif self._match_text_seq("WITH", "(", "MODULUS"): 2055 this = self._parse_number() 2056 self._match_text_seq(",", "REMAINDER") 2057 expression = self._parse_number() 2058 self._match_r_paren() 2059 else: 2060 self.raise_error("Failed to parse partition bound spec.") 2061 2062 return self.expression( 2063 exp.PartitionBoundSpec, 2064 this=this, 2065 expression=expression, 2066 from_expressions=from_expressions, 2067 to_expressions=to_expressions, 2068 ) 2069 2070 # https://www.postgresql.org/docs/current/sql-createtable.html 2071 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2072 if not self._match_text_seq("OF"): 2073 self._retreat(self._index - 1) 2074 return None 2075 2076 this = self._parse_table(schema=True) 2077 2078 if self._match(TokenType.DEFAULT): 2079 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2080 elif self._match_text_seq("FOR", "VALUES"): 2081 expression = self._parse_partition_bound_spec() 2082 else: 2083 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2084 2085 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2086 2087 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2088 self._match(TokenType.EQ) 2089 return self.expression( 2090 exp.PartitionedByProperty, 2091 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2092 ) 2093 2094 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2095 if self._match_text_seq("AND", "STATISTICS"): 2096 statistics = True 2097 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2098 statistics = False 2099 else: 2100 statistics = None 2101 2102 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2103 2104 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2105 if self._match_text_seq("SQL"): 2106 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2107 return None 2108 2109 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2110 if self._match_text_seq("SQL", "DATA"): 2111 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2112 return None 2113 2114 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2115 if self._match_text_seq("PRIMARY", "INDEX"): 2116 return exp.NoPrimaryIndexProperty() 2117 if self._match_text_seq("SQL"): 2118 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2119 return None 2120 2121 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2122 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2123 return exp.OnCommitProperty() 2124 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2125 return exp.OnCommitProperty(delete=True) 2126 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2127 2128 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2129 if self._match_text_seq("SQL", "DATA"): 2130 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2131 return None 2132 2133 def _parse_distkey(self) -> exp.DistKeyProperty: 2134 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2135 2136 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2137 table = self._parse_table(schema=True) 2138 2139 options = [] 2140 while self._match_texts(("INCLUDING", "EXCLUDING")): 2141 this = self._prev.text.upper() 2142 2143 id_var = self._parse_id_var() 2144 if not id_var: 2145 return None 2146 2147 options.append( 2148 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2149 ) 2150 2151 return self.expression(exp.LikeProperty, this=table, expressions=options) 2152 2153 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2154 return self.expression( 2155 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2156 ) 2157 2158 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2159 self._match(TokenType.EQ) 2160 return self.expression( 2161 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2162 ) 2163 2164 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2165 self._match_text_seq("WITH", "CONNECTION") 2166 return self.expression( 2167 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2168 ) 2169 2170 def _parse_returns(self) -> exp.ReturnsProperty: 2171 value: t.Optional[exp.Expression] 2172 is_table = self._match(TokenType.TABLE) 2173 2174 if is_table: 2175 if self._match(TokenType.LT): 2176 value = self.expression( 2177 exp.Schema, 2178 this="TABLE", 2179 expressions=self._parse_csv(self._parse_struct_types), 2180 ) 2181 if not self._match(TokenType.GT): 2182 self.raise_error("Expecting >") 2183 else: 2184 value = self._parse_schema(exp.var("TABLE")) 2185 else: 2186 value = self._parse_types() 2187 2188 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2189 2190 def _parse_describe(self) -> exp.Describe: 2191 kind = self._match_set(self.CREATABLES) and self._prev.text 2192 style = self._match_texts(("EXTENDED", "FORMATTED")) and self._prev.text.upper() 2193 this = self._parse_table(schema=True) 2194 properties = self._parse_properties() 2195 expressions = properties.expressions if properties else None 2196 return self.expression( 2197 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2198 ) 2199 2200 def _parse_insert(self) -> exp.Insert: 2201 comments = ensure_list(self._prev_comments) 2202 hint = self._parse_hint() 2203 overwrite = self._match(TokenType.OVERWRITE) 2204 ignore = self._match(TokenType.IGNORE) 2205 local = self._match_text_seq("LOCAL") 2206 alternative = None 2207 is_function = None 2208 2209 if self._match_text_seq("DIRECTORY"): 2210 this: t.Optional[exp.Expression] = self.expression( 2211 exp.Directory, 2212 this=self._parse_var_or_string(), 2213 local=local, 2214 row_format=self._parse_row_format(match_row=True), 2215 ) 2216 else: 2217 if self._match(TokenType.OR): 2218 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2219 2220 self._match(TokenType.INTO) 2221 comments += ensure_list(self._prev_comments) 2222 self._match(TokenType.TABLE) 2223 is_function = self._match(TokenType.FUNCTION) 2224 2225 this = self._parse_table(schema=True) if not is_function else self._parse_function() 2226 2227 returning = self._parse_returning() 2228 2229 return self.expression( 2230 exp.Insert, 2231 comments=comments, 2232 hint=hint, 2233 is_function=is_function, 2234 this=this, 2235 by_name=self._match_text_seq("BY", "NAME"), 2236 exists=self._parse_exists(), 2237 partition=self._parse_partition(), 2238 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2239 and self._parse_conjunction(), 2240 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2241 conflict=self._parse_on_conflict(), 2242 returning=returning or self._parse_returning(), 2243 overwrite=overwrite, 2244 alternative=alternative, 2245 ignore=ignore, 2246 ) 2247 2248 def _parse_kill(self) -> exp.Kill: 2249 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2250 2251 return self.expression( 2252 exp.Kill, 2253 this=self._parse_primary(), 2254 kind=kind, 2255 ) 2256 2257 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2258 conflict = self._match_text_seq("ON", "CONFLICT") 2259 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2260 2261 if not conflict and not duplicate: 2262 return None 2263 2264 conflict_keys = None 2265 constraint = None 2266 2267 if conflict: 2268 if self._match_text_seq("ON", "CONSTRAINT"): 2269 constraint = self._parse_id_var() 2270 elif self._match(TokenType.L_PAREN): 2271 conflict_keys = self._parse_csv(self._parse_id_var) 2272 self._match_r_paren() 2273 2274 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2275 if self._prev.token_type == TokenType.UPDATE: 2276 self._match(TokenType.SET) 2277 expressions = self._parse_csv(self._parse_equality) 2278 else: 2279 expressions = None 2280 2281 return self.expression( 2282 exp.OnConflict, 2283 duplicate=duplicate, 2284 expressions=expressions, 2285 action=action, 2286 conflict_keys=conflict_keys, 2287 constraint=constraint, 2288 ) 2289 2290 def _parse_returning(self) -> t.Optional[exp.Returning]: 2291 if not self._match(TokenType.RETURNING): 2292 return None 2293 return self.expression( 2294 exp.Returning, 2295 expressions=self._parse_csv(self._parse_expression), 2296 into=self._match(TokenType.INTO) and self._parse_table_part(), 2297 ) 2298 2299 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2300 if not self._match(TokenType.FORMAT): 2301 return None 2302 return self._parse_row_format() 2303 2304 def _parse_row_format( 2305 self, match_row: bool = False 2306 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2307 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2308 return None 2309 2310 if self._match_text_seq("SERDE"): 2311 this = self._parse_string() 2312 2313 serde_properties = None 2314 if self._match(TokenType.SERDE_PROPERTIES): 2315 serde_properties = self.expression( 2316 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2317 ) 2318 2319 return self.expression( 2320 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2321 ) 2322 2323 self._match_text_seq("DELIMITED") 2324 2325 kwargs = {} 2326 2327 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2328 kwargs["fields"] = self._parse_string() 2329 if self._match_text_seq("ESCAPED", "BY"): 2330 kwargs["escaped"] = self._parse_string() 2331 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2332 kwargs["collection_items"] = self._parse_string() 2333 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2334 kwargs["map_keys"] = self._parse_string() 2335 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2336 kwargs["lines"] = self._parse_string() 2337 if self._match_text_seq("NULL", "DEFINED", "AS"): 2338 kwargs["null"] = self._parse_string() 2339 2340 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2341 2342 def _parse_load(self) -> exp.LoadData | exp.Command: 2343 if self._match_text_seq("DATA"): 2344 local = self._match_text_seq("LOCAL") 2345 self._match_text_seq("INPATH") 2346 inpath = self._parse_string() 2347 overwrite = self._match(TokenType.OVERWRITE) 2348 self._match_pair(TokenType.INTO, TokenType.TABLE) 2349 2350 return self.expression( 2351 exp.LoadData, 2352 this=self._parse_table(schema=True), 2353 local=local, 2354 overwrite=overwrite, 2355 inpath=inpath, 2356 partition=self._parse_partition(), 2357 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2358 serde=self._match_text_seq("SERDE") and self._parse_string(), 2359 ) 2360 return self._parse_as_command(self._prev) 2361 2362 def _parse_delete(self) -> exp.Delete: 2363 # This handles MySQL's "Multiple-Table Syntax" 2364 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2365 tables = None 2366 comments = self._prev_comments 2367 if not self._match(TokenType.FROM, advance=False): 2368 tables = self._parse_csv(self._parse_table) or None 2369 2370 returning = self._parse_returning() 2371 2372 return self.expression( 2373 exp.Delete, 2374 comments=comments, 2375 tables=tables, 2376 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2377 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2378 where=self._parse_where(), 2379 returning=returning or self._parse_returning(), 2380 limit=self._parse_limit(), 2381 ) 2382 2383 def _parse_update(self) -> exp.Update: 2384 comments = self._prev_comments 2385 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2386 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2387 returning = self._parse_returning() 2388 return self.expression( 2389 exp.Update, 2390 comments=comments, 2391 **{ # type: ignore 2392 "this": this, 2393 "expressions": expressions, 2394 "from": self._parse_from(joins=True), 2395 "where": self._parse_where(), 2396 "returning": returning or self._parse_returning(), 2397 "order": self._parse_order(), 2398 "limit": self._parse_limit(), 2399 }, 2400 ) 2401 2402 def _parse_uncache(self) -> exp.Uncache: 2403 if not self._match(TokenType.TABLE): 2404 self.raise_error("Expecting TABLE after UNCACHE") 2405 2406 return self.expression( 2407 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2408 ) 2409 2410 def _parse_cache(self) -> exp.Cache: 2411 lazy = self._match_text_seq("LAZY") 2412 self._match(TokenType.TABLE) 2413 table = self._parse_table(schema=True) 2414 2415 options = [] 2416 if self._match_text_seq("OPTIONS"): 2417 self._match_l_paren() 2418 k = self._parse_string() 2419 self._match(TokenType.EQ) 2420 v = self._parse_string() 2421 options = [k, v] 2422 self._match_r_paren() 2423 2424 self._match(TokenType.ALIAS) 2425 return self.expression( 2426 exp.Cache, 2427 this=table, 2428 lazy=lazy, 2429 options=options, 2430 expression=self._parse_select(nested=True), 2431 ) 2432 2433 def _parse_partition(self) -> t.Optional[exp.Partition]: 2434 if not self._match(TokenType.PARTITION): 2435 return None 2436 2437 return self.expression( 2438 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2439 ) 2440 2441 def _parse_value(self) -> exp.Tuple: 2442 if self._match(TokenType.L_PAREN): 2443 expressions = self._parse_csv(self._parse_expression) 2444 self._match_r_paren() 2445 return self.expression(exp.Tuple, expressions=expressions) 2446 2447 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2448 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2449 2450 def _parse_projections(self) -> t.List[exp.Expression]: 2451 return self._parse_expressions() 2452 2453 def _parse_select( 2454 self, 2455 nested: bool = False, 2456 table: bool = False, 2457 parse_subquery_alias: bool = True, 2458 parse_set_operation: bool = True, 2459 ) -> t.Optional[exp.Expression]: 2460 cte = self._parse_with() 2461 2462 if cte: 2463 this = self._parse_statement() 2464 2465 if not this: 2466 self.raise_error("Failed to parse any statement following CTE") 2467 return cte 2468 2469 if "with" in this.arg_types: 2470 this.set("with", cte) 2471 else: 2472 self.raise_error(f"{this.key} does not support CTE") 2473 this = cte 2474 2475 return this 2476 2477 # duckdb supports leading with FROM x 2478 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2479 2480 if self._match(TokenType.SELECT): 2481 comments = self._prev_comments 2482 2483 hint = self._parse_hint() 2484 all_ = self._match(TokenType.ALL) 2485 distinct = self._match_set(self.DISTINCT_TOKENS) 2486 2487 kind = ( 2488 self._match(TokenType.ALIAS) 2489 and self._match_texts(("STRUCT", "VALUE")) 2490 and self._prev.text.upper() 2491 ) 2492 2493 if distinct: 2494 distinct = self.expression( 2495 exp.Distinct, 2496 on=self._parse_value() if self._match(TokenType.ON) else None, 2497 ) 2498 2499 if all_ and distinct: 2500 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2501 2502 limit = self._parse_limit(top=True) 2503 projections = self._parse_projections() 2504 2505 this = self.expression( 2506 exp.Select, 2507 kind=kind, 2508 hint=hint, 2509 distinct=distinct, 2510 expressions=projections, 2511 limit=limit, 2512 ) 2513 this.comments = comments 2514 2515 into = self._parse_into() 2516 if into: 2517 this.set("into", into) 2518 2519 if not from_: 2520 from_ = self._parse_from() 2521 2522 if from_: 2523 this.set("from", from_) 2524 2525 this = self._parse_query_modifiers(this) 2526 elif (table or nested) and self._match(TokenType.L_PAREN): 2527 if self._match(TokenType.PIVOT): 2528 this = self._parse_simplified_pivot() 2529 elif self._match(TokenType.FROM): 2530 this = exp.select("*").from_( 2531 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2532 ) 2533 else: 2534 this = ( 2535 self._parse_table() 2536 if table 2537 else self._parse_select(nested=True, parse_set_operation=False) 2538 ) 2539 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2540 2541 self._match_r_paren() 2542 2543 # We return early here so that the UNION isn't attached to the subquery by the 2544 # following call to _parse_set_operations, but instead becomes the parent node 2545 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2546 elif self._match(TokenType.VALUES, advance=False): 2547 this = self._parse_derived_table_values() 2548 elif from_: 2549 this = exp.select("*").from_(from_.this, copy=False) 2550 else: 2551 this = None 2552 2553 if parse_set_operation: 2554 return self._parse_set_operations(this) 2555 return this 2556 2557 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2558 if not skip_with_token and not self._match(TokenType.WITH): 2559 return None 2560 2561 comments = self._prev_comments 2562 recursive = self._match(TokenType.RECURSIVE) 2563 2564 expressions = [] 2565 while True: 2566 expressions.append(self._parse_cte()) 2567 2568 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2569 break 2570 else: 2571 self._match(TokenType.WITH) 2572 2573 return self.expression( 2574 exp.With, comments=comments, expressions=expressions, recursive=recursive 2575 ) 2576 2577 def _parse_cte(self) -> exp.CTE: 2578 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2579 if not alias or not alias.this: 2580 self.raise_error("Expected CTE to have alias") 2581 2582 self._match(TokenType.ALIAS) 2583 2584 if self._match_text_seq("NOT", "MATERIALIZED"): 2585 materialized = False 2586 elif self._match_text_seq("MATERIALIZED"): 2587 materialized = True 2588 else: 2589 materialized = None 2590 2591 return self.expression( 2592 exp.CTE, 2593 this=self._parse_wrapped(self._parse_statement), 2594 alias=alias, 2595 materialized=materialized, 2596 ) 2597 2598 def _parse_table_alias( 2599 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2600 ) -> t.Optional[exp.TableAlias]: 2601 any_token = self._match(TokenType.ALIAS) 2602 alias = ( 2603 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2604 or self._parse_string_as_identifier() 2605 ) 2606 2607 index = self._index 2608 if self._match(TokenType.L_PAREN): 2609 columns = self._parse_csv(self._parse_function_parameter) 2610 self._match_r_paren() if columns else self._retreat(index) 2611 else: 2612 columns = None 2613 2614 if not alias and not columns: 2615 return None 2616 2617 return self.expression(exp.TableAlias, this=alias, columns=columns) 2618 2619 def _parse_subquery( 2620 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2621 ) -> t.Optional[exp.Subquery]: 2622 if not this: 2623 return None 2624 2625 return self.expression( 2626 exp.Subquery, 2627 this=this, 2628 pivots=self._parse_pivots(), 2629 alias=self._parse_table_alias() if parse_alias else None, 2630 ) 2631 2632 def _implicit_unnests_to_explicit(self, this: E) -> E: 2633 from sqlglot.optimizer.normalize_identifiers import ( 2634 normalize_identifiers as _norm, 2635 ) 2636 2637 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2638 for i, join in enumerate(this.args.get("joins") or []): 2639 table = join.this 2640 normalized_table = table.copy() 2641 normalized_table.meta["maybe_column"] = True 2642 normalized_table = _norm(normalized_table, dialect=self.dialect) 2643 2644 if isinstance(table, exp.Table) and not join.args.get("on"): 2645 if normalized_table.parts[0].name in refs: 2646 table_as_column = table.to_column() 2647 unnest = exp.Unnest(expressions=[table_as_column]) 2648 2649 # Table.to_column creates a parent Alias node that we want to convert to 2650 # a TableAlias and attach to the Unnest, so it matches the parser's output 2651 if isinstance(table.args.get("alias"), exp.TableAlias): 2652 table_as_column.replace(table_as_column.this) 2653 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2654 2655 table.replace(unnest) 2656 2657 refs.add(normalized_table.alias_or_name) 2658 2659 return this 2660 2661 def _parse_query_modifiers( 2662 self, this: t.Optional[exp.Expression] 2663 ) -> t.Optional[exp.Expression]: 2664 if isinstance(this, (exp.Query, exp.Table)): 2665 for join in self._parse_joins(): 2666 this.append("joins", join) 2667 for lateral in iter(self._parse_lateral, None): 2668 this.append("laterals", lateral) 2669 2670 while True: 2671 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2672 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2673 key, expression = parser(self) 2674 2675 if expression: 2676 this.set(key, expression) 2677 if key == "limit": 2678 offset = expression.args.pop("offset", None) 2679 2680 if offset: 2681 offset = exp.Offset(expression=offset) 2682 this.set("offset", offset) 2683 2684 limit_by_expressions = expression.expressions 2685 expression.set("expressions", None) 2686 offset.set("expressions", limit_by_expressions) 2687 continue 2688 break 2689 2690 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2691 this = self._implicit_unnests_to_explicit(this) 2692 2693 return this 2694 2695 def _parse_hint(self) -> t.Optional[exp.Hint]: 2696 if self._match(TokenType.HINT): 2697 hints = [] 2698 for hint in iter( 2699 lambda: self._parse_csv( 2700 lambda: self._parse_function() or self._parse_var(upper=True) 2701 ), 2702 [], 2703 ): 2704 hints.extend(hint) 2705 2706 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2707 self.raise_error("Expected */ after HINT") 2708 2709 return self.expression(exp.Hint, expressions=hints) 2710 2711 return None 2712 2713 def _parse_into(self) -> t.Optional[exp.Into]: 2714 if not self._match(TokenType.INTO): 2715 return None 2716 2717 temp = self._match(TokenType.TEMPORARY) 2718 unlogged = self._match_text_seq("UNLOGGED") 2719 self._match(TokenType.TABLE) 2720 2721 return self.expression( 2722 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2723 ) 2724 2725 def _parse_from( 2726 self, joins: bool = False, skip_from_token: bool = False 2727 ) -> t.Optional[exp.From]: 2728 if not skip_from_token and not self._match(TokenType.FROM): 2729 return None 2730 2731 return self.expression( 2732 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2733 ) 2734 2735 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2736 if not self._match(TokenType.MATCH_RECOGNIZE): 2737 return None 2738 2739 self._match_l_paren() 2740 2741 partition = self._parse_partition_by() 2742 order = self._parse_order() 2743 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2744 2745 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2746 rows = exp.var("ONE ROW PER MATCH") 2747 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2748 text = "ALL ROWS PER MATCH" 2749 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2750 text += " SHOW EMPTY MATCHES" 2751 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2752 text += " OMIT EMPTY MATCHES" 2753 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2754 text += " WITH UNMATCHED ROWS" 2755 rows = exp.var(text) 2756 else: 2757 rows = None 2758 2759 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2760 text = "AFTER MATCH SKIP" 2761 if self._match_text_seq("PAST", "LAST", "ROW"): 2762 text += " PAST LAST ROW" 2763 elif self._match_text_seq("TO", "NEXT", "ROW"): 2764 text += " TO NEXT ROW" 2765 elif self._match_text_seq("TO", "FIRST"): 2766 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2767 elif self._match_text_seq("TO", "LAST"): 2768 text += f" TO LAST {self._advance_any().text}" # type: ignore 2769 after = exp.var(text) 2770 else: 2771 after = None 2772 2773 if self._match_text_seq("PATTERN"): 2774 self._match_l_paren() 2775 2776 if not self._curr: 2777 self.raise_error("Expecting )", self._curr) 2778 2779 paren = 1 2780 start = self._curr 2781 2782 while self._curr and paren > 0: 2783 if self._curr.token_type == TokenType.L_PAREN: 2784 paren += 1 2785 if self._curr.token_type == TokenType.R_PAREN: 2786 paren -= 1 2787 2788 end = self._prev 2789 self._advance() 2790 2791 if paren > 0: 2792 self.raise_error("Expecting )", self._curr) 2793 2794 pattern = exp.var(self._find_sql(start, end)) 2795 else: 2796 pattern = None 2797 2798 define = ( 2799 self._parse_csv(self._parse_name_as_expression) 2800 if self._match_text_seq("DEFINE") 2801 else None 2802 ) 2803 2804 self._match_r_paren() 2805 2806 return self.expression( 2807 exp.MatchRecognize, 2808 partition_by=partition, 2809 order=order, 2810 measures=measures, 2811 rows=rows, 2812 after=after, 2813 pattern=pattern, 2814 define=define, 2815 alias=self._parse_table_alias(), 2816 ) 2817 2818 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2819 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2820 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2821 cross_apply = False 2822 2823 if cross_apply is not None: 2824 this = self._parse_select(table=True) 2825 view = None 2826 outer = None 2827 elif self._match(TokenType.LATERAL): 2828 this = self._parse_select(table=True) 2829 view = self._match(TokenType.VIEW) 2830 outer = self._match(TokenType.OUTER) 2831 else: 2832 return None 2833 2834 if not this: 2835 this = ( 2836 self._parse_unnest() 2837 or self._parse_function() 2838 or self._parse_id_var(any_token=False) 2839 ) 2840 2841 while self._match(TokenType.DOT): 2842 this = exp.Dot( 2843 this=this, 2844 expression=self._parse_function() or self._parse_id_var(any_token=False), 2845 ) 2846 2847 if view: 2848 table = self._parse_id_var(any_token=False) 2849 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2850 table_alias: t.Optional[exp.TableAlias] = self.expression( 2851 exp.TableAlias, this=table, columns=columns 2852 ) 2853 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2854 # We move the alias from the lateral's child node to the lateral itself 2855 table_alias = this.args["alias"].pop() 2856 else: 2857 table_alias = self._parse_table_alias() 2858 2859 return self.expression( 2860 exp.Lateral, 2861 this=this, 2862 view=view, 2863 outer=outer, 2864 alias=table_alias, 2865 cross_apply=cross_apply, 2866 ) 2867 2868 def _parse_join_parts( 2869 self, 2870 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2871 return ( 2872 self._match_set(self.JOIN_METHODS) and self._prev, 2873 self._match_set(self.JOIN_SIDES) and self._prev, 2874 self._match_set(self.JOIN_KINDS) and self._prev, 2875 ) 2876 2877 def _parse_join( 2878 self, skip_join_token: bool = False, parse_bracket: bool = False 2879 ) -> t.Optional[exp.Join]: 2880 if self._match(TokenType.COMMA): 2881 return self.expression(exp.Join, this=self._parse_table()) 2882 2883 index = self._index 2884 method, side, kind = self._parse_join_parts() 2885 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2886 join = self._match(TokenType.JOIN) 2887 2888 if not skip_join_token and not join: 2889 self._retreat(index) 2890 kind = None 2891 method = None 2892 side = None 2893 2894 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2895 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2896 2897 if not skip_join_token and not join and not outer_apply and not cross_apply: 2898 return None 2899 2900 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2901 2902 if method: 2903 kwargs["method"] = method.text 2904 if side: 2905 kwargs["side"] = side.text 2906 if kind: 2907 kwargs["kind"] = kind.text 2908 if hint: 2909 kwargs["hint"] = hint 2910 2911 if self._match(TokenType.MATCH_CONDITION): 2912 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 2913 2914 if self._match(TokenType.ON): 2915 kwargs["on"] = self._parse_conjunction() 2916 elif self._match(TokenType.USING): 2917 kwargs["using"] = self._parse_wrapped_id_vars() 2918 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 2919 kind and kind.token_type == TokenType.CROSS 2920 ): 2921 index = self._index 2922 joins: t.Optional[list] = list(self._parse_joins()) 2923 2924 if joins and self._match(TokenType.ON): 2925 kwargs["on"] = self._parse_conjunction() 2926 elif joins and self._match(TokenType.USING): 2927 kwargs["using"] = self._parse_wrapped_id_vars() 2928 else: 2929 joins = None 2930 self._retreat(index) 2931 2932 kwargs["this"].set("joins", joins if joins else None) 2933 2934 comments = [c for token in (method, side, kind) if token for c in token.comments] 2935 return self.expression(exp.Join, comments=comments, **kwargs) 2936 2937 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2938 this = self._parse_conjunction() 2939 2940 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2941 return this 2942 2943 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2944 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2945 2946 return this 2947 2948 def _parse_index_params(self) -> exp.IndexParameters: 2949 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2950 2951 if self._match(TokenType.L_PAREN, advance=False): 2952 columns = self._parse_wrapped_csv(self._parse_with_operator) 2953 else: 2954 columns = None 2955 2956 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2957 partition_by = self._parse_partition_by() 2958 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 2959 tablespace = ( 2960 self._parse_var(any_token=True) 2961 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 2962 else None 2963 ) 2964 where = self._parse_where() 2965 2966 return self.expression( 2967 exp.IndexParameters, 2968 using=using, 2969 columns=columns, 2970 include=include, 2971 partition_by=partition_by, 2972 where=where, 2973 with_storage=with_storage, 2974 tablespace=tablespace, 2975 ) 2976 2977 def _parse_index( 2978 self, 2979 index: t.Optional[exp.Expression] = None, 2980 ) -> t.Optional[exp.Index]: 2981 if index: 2982 unique = None 2983 primary = None 2984 amp = None 2985 2986 self._match(TokenType.ON) 2987 self._match(TokenType.TABLE) # hive 2988 table = self._parse_table_parts(schema=True) 2989 else: 2990 unique = self._match(TokenType.UNIQUE) 2991 primary = self._match_text_seq("PRIMARY") 2992 amp = self._match_text_seq("AMP") 2993 2994 if not self._match(TokenType.INDEX): 2995 return None 2996 2997 index = self._parse_id_var() 2998 table = None 2999 3000 params = self._parse_index_params() 3001 3002 return self.expression( 3003 exp.Index, 3004 this=index, 3005 table=table, 3006 unique=unique, 3007 primary=primary, 3008 amp=amp, 3009 params=params, 3010 ) 3011 3012 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3013 hints: t.List[exp.Expression] = [] 3014 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3015 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3016 hints.append( 3017 self.expression( 3018 exp.WithTableHint, 3019 expressions=self._parse_csv( 3020 lambda: self._parse_function() or self._parse_var(any_token=True) 3021 ), 3022 ) 3023 ) 3024 self._match_r_paren() 3025 else: 3026 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3027 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3028 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3029 3030 self._match_texts(("INDEX", "KEY")) 3031 if self._match(TokenType.FOR): 3032 hint.set("target", self._advance_any() and self._prev.text.upper()) 3033 3034 hint.set("expressions", self._parse_wrapped_id_vars()) 3035 hints.append(hint) 3036 3037 return hints or None 3038 3039 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3040 return ( 3041 (not schema and self._parse_function(optional_parens=False)) 3042 or self._parse_id_var(any_token=False) 3043 or self._parse_string_as_identifier() 3044 or self._parse_placeholder() 3045 ) 3046 3047 def _parse_table_parts( 3048 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3049 ) -> exp.Table: 3050 catalog = None 3051 db = None 3052 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3053 3054 while self._match(TokenType.DOT): 3055 if catalog: 3056 # This allows nesting the table in arbitrarily many dot expressions if needed 3057 table = self.expression( 3058 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3059 ) 3060 else: 3061 catalog = db 3062 db = table 3063 # "" used for tsql FROM a..b case 3064 table = self._parse_table_part(schema=schema) or "" 3065 3066 if ( 3067 wildcard 3068 and self._is_connected() 3069 and (isinstance(table, exp.Identifier) or not table) 3070 and self._match(TokenType.STAR) 3071 ): 3072 if isinstance(table, exp.Identifier): 3073 table.args["this"] += "*" 3074 else: 3075 table = exp.Identifier(this="*") 3076 3077 if is_db_reference: 3078 catalog = db 3079 db = table 3080 table = None 3081 3082 if not table and not is_db_reference: 3083 self.raise_error(f"Expected table name but got {self._curr}") 3084 if not db and is_db_reference: 3085 self.raise_error(f"Expected database name but got {self._curr}") 3086 3087 return self.expression( 3088 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 3089 ) 3090 3091 def _parse_table( 3092 self, 3093 schema: bool = False, 3094 joins: bool = False, 3095 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3096 parse_bracket: bool = False, 3097 is_db_reference: bool = False, 3098 ) -> t.Optional[exp.Expression]: 3099 lateral = self._parse_lateral() 3100 if lateral: 3101 return lateral 3102 3103 unnest = self._parse_unnest() 3104 if unnest: 3105 return unnest 3106 3107 values = self._parse_derived_table_values() 3108 if values: 3109 return values 3110 3111 subquery = self._parse_select(table=True) 3112 if subquery: 3113 if not subquery.args.get("pivots"): 3114 subquery.set("pivots", self._parse_pivots()) 3115 return subquery 3116 3117 bracket = parse_bracket and self._parse_bracket(None) 3118 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3119 3120 only = self._match(TokenType.ONLY) 3121 3122 this = t.cast( 3123 exp.Expression, 3124 bracket 3125 or self._parse_bracket( 3126 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3127 ), 3128 ) 3129 3130 if only: 3131 this.set("only", only) 3132 3133 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3134 self._match_text_seq("*") 3135 3136 if schema: 3137 return self._parse_schema(this=this) 3138 3139 version = self._parse_version() 3140 3141 if version: 3142 this.set("version", version) 3143 3144 if self.dialect.ALIAS_POST_TABLESAMPLE: 3145 table_sample = self._parse_table_sample() 3146 3147 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3148 if alias: 3149 this.set("alias", alias) 3150 3151 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3152 return self.expression( 3153 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3154 ) 3155 3156 this.set("hints", self._parse_table_hints()) 3157 3158 if not this.args.get("pivots"): 3159 this.set("pivots", self._parse_pivots()) 3160 3161 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3162 table_sample = self._parse_table_sample() 3163 3164 if table_sample: 3165 table_sample.set("this", this) 3166 this = table_sample 3167 3168 if joins: 3169 for join in self._parse_joins(): 3170 this.append("joins", join) 3171 3172 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3173 this.set("ordinality", True) 3174 this.set("alias", self._parse_table_alias()) 3175 3176 return this 3177 3178 def _parse_version(self) -> t.Optional[exp.Version]: 3179 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3180 this = "TIMESTAMP" 3181 elif self._match(TokenType.VERSION_SNAPSHOT): 3182 this = "VERSION" 3183 else: 3184 return None 3185 3186 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3187 kind = self._prev.text.upper() 3188 start = self._parse_bitwise() 3189 self._match_texts(("TO", "AND")) 3190 end = self._parse_bitwise() 3191 expression: t.Optional[exp.Expression] = self.expression( 3192 exp.Tuple, expressions=[start, end] 3193 ) 3194 elif self._match_text_seq("CONTAINED", "IN"): 3195 kind = "CONTAINED IN" 3196 expression = self.expression( 3197 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3198 ) 3199 elif self._match(TokenType.ALL): 3200 kind = "ALL" 3201 expression = None 3202 else: 3203 self._match_text_seq("AS", "OF") 3204 kind = "AS OF" 3205 expression = self._parse_type() 3206 3207 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3208 3209 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3210 if not self._match(TokenType.UNNEST): 3211 return None 3212 3213 expressions = self._parse_wrapped_csv(self._parse_equality) 3214 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3215 3216 alias = self._parse_table_alias() if with_alias else None 3217 3218 if alias: 3219 if self.dialect.UNNEST_COLUMN_ONLY: 3220 if alias.args.get("columns"): 3221 self.raise_error("Unexpected extra column alias in unnest.") 3222 3223 alias.set("columns", [alias.this]) 3224 alias.set("this", None) 3225 3226 columns = alias.args.get("columns") or [] 3227 if offset and len(expressions) < len(columns): 3228 offset = columns.pop() 3229 3230 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3231 self._match(TokenType.ALIAS) 3232 offset = self._parse_id_var( 3233 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3234 ) or exp.to_identifier("offset") 3235 3236 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3237 3238 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3239 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3240 if not is_derived and not self._match_text_seq("VALUES"): 3241 return None 3242 3243 expressions = self._parse_csv(self._parse_value) 3244 alias = self._parse_table_alias() 3245 3246 if is_derived: 3247 self._match_r_paren() 3248 3249 return self.expression( 3250 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3251 ) 3252 3253 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3254 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3255 as_modifier and self._match_text_seq("USING", "SAMPLE") 3256 ): 3257 return None 3258 3259 bucket_numerator = None 3260 bucket_denominator = None 3261 bucket_field = None 3262 percent = None 3263 size = None 3264 seed = None 3265 3266 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3267 matched_l_paren = self._match(TokenType.L_PAREN) 3268 3269 if self.TABLESAMPLE_CSV: 3270 num = None 3271 expressions = self._parse_csv(self._parse_primary) 3272 else: 3273 expressions = None 3274 num = ( 3275 self._parse_factor() 3276 if self._match(TokenType.NUMBER, advance=False) 3277 else self._parse_primary() or self._parse_placeholder() 3278 ) 3279 3280 if self._match_text_seq("BUCKET"): 3281 bucket_numerator = self._parse_number() 3282 self._match_text_seq("OUT", "OF") 3283 bucket_denominator = bucket_denominator = self._parse_number() 3284 self._match(TokenType.ON) 3285 bucket_field = self._parse_field() 3286 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3287 percent = num 3288 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3289 size = num 3290 else: 3291 percent = num 3292 3293 if matched_l_paren: 3294 self._match_r_paren() 3295 3296 if self._match(TokenType.L_PAREN): 3297 method = self._parse_var(upper=True) 3298 seed = self._match(TokenType.COMMA) and self._parse_number() 3299 self._match_r_paren() 3300 elif self._match_texts(("SEED", "REPEATABLE")): 3301 seed = self._parse_wrapped(self._parse_number) 3302 3303 return self.expression( 3304 exp.TableSample, 3305 expressions=expressions, 3306 method=method, 3307 bucket_numerator=bucket_numerator, 3308 bucket_denominator=bucket_denominator, 3309 bucket_field=bucket_field, 3310 percent=percent, 3311 size=size, 3312 seed=seed, 3313 ) 3314 3315 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3316 return list(iter(self._parse_pivot, None)) or None 3317 3318 def _parse_joins(self) -> t.Iterator[exp.Join]: 3319 return iter(self._parse_join, None) 3320 3321 # https://duckdb.org/docs/sql/statements/pivot 3322 def _parse_simplified_pivot(self) -> exp.Pivot: 3323 def _parse_on() -> t.Optional[exp.Expression]: 3324 this = self._parse_bitwise() 3325 return self._parse_in(this) if self._match(TokenType.IN) else this 3326 3327 this = self._parse_table() 3328 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3329 using = self._match(TokenType.USING) and self._parse_csv( 3330 lambda: self._parse_alias(self._parse_function()) 3331 ) 3332 group = self._parse_group() 3333 return self.expression( 3334 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3335 ) 3336 3337 def _parse_pivot_in(self) -> exp.In: 3338 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3339 this = self._parse_conjunction() 3340 3341 self._match(TokenType.ALIAS) 3342 alias = self._parse_field() 3343 if alias: 3344 return self.expression(exp.PivotAlias, this=this, alias=alias) 3345 3346 return this 3347 3348 value = self._parse_column() 3349 3350 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3351 self.raise_error("Expecting IN (") 3352 3353 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3354 3355 self._match_r_paren() 3356 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3357 3358 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3359 index = self._index 3360 include_nulls = None 3361 3362 if self._match(TokenType.PIVOT): 3363 unpivot = False 3364 elif self._match(TokenType.UNPIVOT): 3365 unpivot = True 3366 3367 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3368 if self._match_text_seq("INCLUDE", "NULLS"): 3369 include_nulls = True 3370 elif self._match_text_seq("EXCLUDE", "NULLS"): 3371 include_nulls = False 3372 else: 3373 return None 3374 3375 expressions = [] 3376 3377 if not self._match(TokenType.L_PAREN): 3378 self._retreat(index) 3379 return None 3380 3381 if unpivot: 3382 expressions = self._parse_csv(self._parse_column) 3383 else: 3384 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3385 3386 if not expressions: 3387 self.raise_error("Failed to parse PIVOT's aggregation list") 3388 3389 if not self._match(TokenType.FOR): 3390 self.raise_error("Expecting FOR") 3391 3392 field = self._parse_pivot_in() 3393 3394 self._match_r_paren() 3395 3396 pivot = self.expression( 3397 exp.Pivot, 3398 expressions=expressions, 3399 field=field, 3400 unpivot=unpivot, 3401 include_nulls=include_nulls, 3402 ) 3403 3404 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3405 pivot.set("alias", self._parse_table_alias()) 3406 3407 if not unpivot: 3408 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3409 3410 columns: t.List[exp.Expression] = [] 3411 for fld in pivot.args["field"].expressions: 3412 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3413 for name in names: 3414 if self.PREFIXED_PIVOT_COLUMNS: 3415 name = f"{name}_{field_name}" if name else field_name 3416 else: 3417 name = f"{field_name}_{name}" if name else field_name 3418 3419 columns.append(exp.to_identifier(name)) 3420 3421 pivot.set("columns", columns) 3422 3423 return pivot 3424 3425 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3426 return [agg.alias for agg in aggregations] 3427 3428 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3429 if not skip_where_token and not self._match(TokenType.PREWHERE): 3430 return None 3431 3432 return self.expression( 3433 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3434 ) 3435 3436 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3437 if not skip_where_token and not self._match(TokenType.WHERE): 3438 return None 3439 3440 return self.expression( 3441 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3442 ) 3443 3444 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3445 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3446 return None 3447 3448 elements = defaultdict(list) 3449 3450 if self._match(TokenType.ALL): 3451 return self.expression(exp.Group, all=True) 3452 3453 while True: 3454 expressions = self._parse_csv(self._parse_conjunction) 3455 if expressions: 3456 elements["expressions"].extend(expressions) 3457 3458 grouping_sets = self._parse_grouping_sets() 3459 if grouping_sets: 3460 elements["grouping_sets"].extend(grouping_sets) 3461 3462 rollup = None 3463 cube = None 3464 totals = None 3465 3466 index = self._index 3467 with_ = self._match(TokenType.WITH) 3468 if self._match(TokenType.ROLLUP): 3469 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3470 elements["rollup"].extend(ensure_list(rollup)) 3471 3472 if self._match(TokenType.CUBE): 3473 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3474 elements["cube"].extend(ensure_list(cube)) 3475 3476 if self._match_text_seq("TOTALS"): 3477 totals = True 3478 elements["totals"] = True # type: ignore 3479 3480 if not (grouping_sets or rollup or cube or totals): 3481 if with_: 3482 self._retreat(index) 3483 break 3484 3485 return self.expression(exp.Group, **elements) # type: ignore 3486 3487 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3488 if not self._match(TokenType.GROUPING_SETS): 3489 return None 3490 3491 return self._parse_wrapped_csv(self._parse_grouping_set) 3492 3493 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3494 if self._match(TokenType.L_PAREN): 3495 grouping_set = self._parse_csv(self._parse_column) 3496 self._match_r_paren() 3497 return self.expression(exp.Tuple, expressions=grouping_set) 3498 3499 return self._parse_column() 3500 3501 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3502 if not skip_having_token and not self._match(TokenType.HAVING): 3503 return None 3504 return self.expression(exp.Having, this=self._parse_conjunction()) 3505 3506 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3507 if not self._match(TokenType.QUALIFY): 3508 return None 3509 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3510 3511 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3512 if skip_start_token: 3513 start = None 3514 elif self._match(TokenType.START_WITH): 3515 start = self._parse_conjunction() 3516 else: 3517 return None 3518 3519 self._match(TokenType.CONNECT_BY) 3520 nocycle = self._match_text_seq("NOCYCLE") 3521 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3522 exp.Prior, this=self._parse_bitwise() 3523 ) 3524 connect = self._parse_conjunction() 3525 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3526 3527 if not start and self._match(TokenType.START_WITH): 3528 start = self._parse_conjunction() 3529 3530 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3531 3532 def _parse_name_as_expression(self) -> exp.Alias: 3533 return self.expression( 3534 exp.Alias, 3535 alias=self._parse_id_var(any_token=True), 3536 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3537 ) 3538 3539 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3540 if self._match_text_seq("INTERPOLATE"): 3541 return self._parse_wrapped_csv(self._parse_name_as_expression) 3542 return None 3543 3544 def _parse_order( 3545 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3546 ) -> t.Optional[exp.Expression]: 3547 siblings = None 3548 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3549 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3550 return this 3551 3552 siblings = True 3553 3554 return self.expression( 3555 exp.Order, 3556 this=this, 3557 expressions=self._parse_csv(self._parse_ordered), 3558 interpolate=self._parse_interpolate(), 3559 siblings=siblings, 3560 ) 3561 3562 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3563 if not self._match(token): 3564 return None 3565 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3566 3567 def _parse_ordered( 3568 self, parse_method: t.Optional[t.Callable] = None 3569 ) -> t.Optional[exp.Ordered]: 3570 this = parse_method() if parse_method else self._parse_conjunction() 3571 if not this: 3572 return None 3573 3574 asc = self._match(TokenType.ASC) 3575 desc = self._match(TokenType.DESC) or (asc and False) 3576 3577 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3578 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3579 3580 nulls_first = is_nulls_first or False 3581 explicitly_null_ordered = is_nulls_first or is_nulls_last 3582 3583 if ( 3584 not explicitly_null_ordered 3585 and ( 3586 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3587 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3588 ) 3589 and self.dialect.NULL_ORDERING != "nulls_are_last" 3590 ): 3591 nulls_first = True 3592 3593 if self._match_text_seq("WITH", "FILL"): 3594 with_fill = self.expression( 3595 exp.WithFill, 3596 **{ # type: ignore 3597 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3598 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3599 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3600 }, 3601 ) 3602 else: 3603 with_fill = None 3604 3605 return self.expression( 3606 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3607 ) 3608 3609 def _parse_limit( 3610 self, 3611 this: t.Optional[exp.Expression] = None, 3612 top: bool = False, 3613 skip_limit_token: bool = False, 3614 ) -> t.Optional[exp.Expression]: 3615 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3616 comments = self._prev_comments 3617 if top: 3618 limit_paren = self._match(TokenType.L_PAREN) 3619 expression = self._parse_term() if limit_paren else self._parse_number() 3620 3621 if limit_paren: 3622 self._match_r_paren() 3623 else: 3624 expression = self._parse_term() 3625 3626 if self._match(TokenType.COMMA): 3627 offset = expression 3628 expression = self._parse_term() 3629 else: 3630 offset = None 3631 3632 limit_exp = self.expression( 3633 exp.Limit, 3634 this=this, 3635 expression=expression, 3636 offset=offset, 3637 comments=comments, 3638 expressions=self._parse_limit_by(), 3639 ) 3640 3641 return limit_exp 3642 3643 if self._match(TokenType.FETCH): 3644 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3645 direction = self._prev.text.upper() if direction else "FIRST" 3646 3647 count = self._parse_field(tokens=self.FETCH_TOKENS) 3648 percent = self._match(TokenType.PERCENT) 3649 3650 self._match_set((TokenType.ROW, TokenType.ROWS)) 3651 3652 only = self._match_text_seq("ONLY") 3653 with_ties = self._match_text_seq("WITH", "TIES") 3654 3655 if only and with_ties: 3656 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3657 3658 return self.expression( 3659 exp.Fetch, 3660 direction=direction, 3661 count=count, 3662 percent=percent, 3663 with_ties=with_ties, 3664 ) 3665 3666 return this 3667 3668 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3669 if not self._match(TokenType.OFFSET): 3670 return this 3671 3672 count = self._parse_term() 3673 self._match_set((TokenType.ROW, TokenType.ROWS)) 3674 3675 return self.expression( 3676 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3677 ) 3678 3679 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3680 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3681 3682 def _parse_locks(self) -> t.List[exp.Lock]: 3683 locks = [] 3684 while True: 3685 if self._match_text_seq("FOR", "UPDATE"): 3686 update = True 3687 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3688 "LOCK", "IN", "SHARE", "MODE" 3689 ): 3690 update = False 3691 else: 3692 break 3693 3694 expressions = None 3695 if self._match_text_seq("OF"): 3696 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3697 3698 wait: t.Optional[bool | exp.Expression] = None 3699 if self._match_text_seq("NOWAIT"): 3700 wait = True 3701 elif self._match_text_seq("WAIT"): 3702 wait = self._parse_primary() 3703 elif self._match_text_seq("SKIP", "LOCKED"): 3704 wait = False 3705 3706 locks.append( 3707 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3708 ) 3709 3710 return locks 3711 3712 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3713 while this and self._match_set(self.SET_OPERATIONS): 3714 token_type = self._prev.token_type 3715 3716 if token_type == TokenType.UNION: 3717 operation = exp.Union 3718 elif token_type == TokenType.EXCEPT: 3719 operation = exp.Except 3720 else: 3721 operation = exp.Intersect 3722 3723 comments = self._prev.comments 3724 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3725 by_name = self._match_text_seq("BY", "NAME") 3726 expression = self._parse_select(nested=True, parse_set_operation=False) 3727 3728 this = self.expression( 3729 operation, 3730 comments=comments, 3731 this=this, 3732 distinct=distinct, 3733 by_name=by_name, 3734 expression=expression, 3735 ) 3736 3737 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3738 expression = this.expression 3739 3740 if expression: 3741 for arg in self.UNION_MODIFIERS: 3742 expr = expression.args.get(arg) 3743 if expr: 3744 this.set(arg, expr.pop()) 3745 3746 return this 3747 3748 def _parse_expression(self) -> t.Optional[exp.Expression]: 3749 return self._parse_alias(self._parse_conjunction()) 3750 3751 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3752 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3753 3754 def _parse_equality(self) -> t.Optional[exp.Expression]: 3755 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3756 3757 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3758 return self._parse_tokens(self._parse_range, self.COMPARISON) 3759 3760 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3761 this = this or self._parse_bitwise() 3762 negate = self._match(TokenType.NOT) 3763 3764 if self._match_set(self.RANGE_PARSERS): 3765 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3766 if not expression: 3767 return this 3768 3769 this = expression 3770 elif self._match(TokenType.ISNULL): 3771 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3772 3773 # Postgres supports ISNULL and NOTNULL for conditions. 3774 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3775 if self._match(TokenType.NOTNULL): 3776 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3777 this = self.expression(exp.Not, this=this) 3778 3779 if negate: 3780 this = self.expression(exp.Not, this=this) 3781 3782 if self._match(TokenType.IS): 3783 this = self._parse_is(this) 3784 3785 return this 3786 3787 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3788 index = self._index - 1 3789 negate = self._match(TokenType.NOT) 3790 3791 if self._match_text_seq("DISTINCT", "FROM"): 3792 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3793 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3794 3795 expression = self._parse_null() or self._parse_boolean() 3796 if not expression: 3797 self._retreat(index) 3798 return None 3799 3800 this = self.expression(exp.Is, this=this, expression=expression) 3801 return self.expression(exp.Not, this=this) if negate else this 3802 3803 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3804 unnest = self._parse_unnest(with_alias=False) 3805 if unnest: 3806 this = self.expression(exp.In, this=this, unnest=unnest) 3807 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3808 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3809 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3810 3811 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3812 this = self.expression(exp.In, this=this, query=expressions[0]) 3813 else: 3814 this = self.expression(exp.In, this=this, expressions=expressions) 3815 3816 if matched_l_paren: 3817 self._match_r_paren(this) 3818 elif not self._match(TokenType.R_BRACKET, expression=this): 3819 self.raise_error("Expecting ]") 3820 else: 3821 this = self.expression(exp.In, this=this, field=self._parse_field()) 3822 3823 return this 3824 3825 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3826 low = self._parse_bitwise() 3827 self._match(TokenType.AND) 3828 high = self._parse_bitwise() 3829 return self.expression(exp.Between, this=this, low=low, high=high) 3830 3831 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3832 if not self._match(TokenType.ESCAPE): 3833 return this 3834 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3835 3836 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3837 index = self._index 3838 3839 if not self._match(TokenType.INTERVAL) and match_interval: 3840 return None 3841 3842 if self._match(TokenType.STRING, advance=False): 3843 this = self._parse_primary() 3844 else: 3845 this = self._parse_term() 3846 3847 if not this or ( 3848 isinstance(this, exp.Column) 3849 and not this.table 3850 and not this.this.quoted 3851 and this.name.upper() == "IS" 3852 ): 3853 self._retreat(index) 3854 return None 3855 3856 unit = self._parse_function() or ( 3857 not self._match(TokenType.ALIAS, advance=False) 3858 and self._parse_var(any_token=True, upper=True) 3859 ) 3860 3861 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3862 # each INTERVAL expression into this canonical form so it's easy to transpile 3863 if this and this.is_number: 3864 this = exp.Literal.string(this.name) 3865 elif this and this.is_string: 3866 parts = this.name.split() 3867 3868 if len(parts) == 2: 3869 if unit: 3870 # This is not actually a unit, it's something else (e.g. a "window side") 3871 unit = None 3872 self._retreat(self._index - 1) 3873 3874 this = exp.Literal.string(parts[0]) 3875 unit = self.expression(exp.Var, this=parts[1].upper()) 3876 3877 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 3878 unit = self.expression( 3879 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 3880 ) 3881 3882 return self.expression(exp.Interval, this=this, unit=unit) 3883 3884 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3885 this = self._parse_term() 3886 3887 while True: 3888 if self._match_set(self.BITWISE): 3889 this = self.expression( 3890 self.BITWISE[self._prev.token_type], 3891 this=this, 3892 expression=self._parse_term(), 3893 ) 3894 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3895 this = self.expression( 3896 exp.DPipe, 3897 this=this, 3898 expression=self._parse_term(), 3899 safe=not self.dialect.STRICT_STRING_CONCAT, 3900 ) 3901 elif self._match(TokenType.DQMARK): 3902 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3903 elif self._match_pair(TokenType.LT, TokenType.LT): 3904 this = self.expression( 3905 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3906 ) 3907 elif self._match_pair(TokenType.GT, TokenType.GT): 3908 this = self.expression( 3909 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3910 ) 3911 else: 3912 break 3913 3914 return this 3915 3916 def _parse_term(self) -> t.Optional[exp.Expression]: 3917 return self._parse_tokens(self._parse_factor, self.TERM) 3918 3919 def _parse_factor(self) -> t.Optional[exp.Expression]: 3920 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3921 this = parse_method() 3922 3923 while self._match_set(self.FACTOR): 3924 this = self.expression( 3925 self.FACTOR[self._prev.token_type], 3926 this=this, 3927 comments=self._prev_comments, 3928 expression=parse_method(), 3929 ) 3930 if isinstance(this, exp.Div): 3931 this.args["typed"] = self.dialect.TYPED_DIVISION 3932 this.args["safe"] = self.dialect.SAFE_DIVISION 3933 3934 return this 3935 3936 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3937 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3938 3939 def _parse_unary(self) -> t.Optional[exp.Expression]: 3940 if self._match_set(self.UNARY_PARSERS): 3941 return self.UNARY_PARSERS[self._prev.token_type](self) 3942 return self._parse_at_time_zone(self._parse_type()) 3943 3944 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3945 interval = parse_interval and self._parse_interval() 3946 if interval: 3947 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3948 while True: 3949 index = self._index 3950 self._match(TokenType.PLUS) 3951 3952 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3953 self._retreat(index) 3954 break 3955 3956 interval = self.expression( # type: ignore 3957 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3958 ) 3959 3960 return interval 3961 3962 index = self._index 3963 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3964 this = self._parse_column() 3965 3966 if data_type: 3967 if isinstance(this, exp.Literal): 3968 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3969 if parser: 3970 return parser(self, this, data_type) 3971 return self.expression(exp.Cast, this=this, to=data_type) 3972 if not data_type.expressions: 3973 self._retreat(index) 3974 return self._parse_column() 3975 return self._parse_column_ops(data_type) 3976 3977 return this and self._parse_column_ops(this) 3978 3979 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3980 this = self._parse_type() 3981 if not this: 3982 return None 3983 3984 if isinstance(this, exp.Column) and not this.table: 3985 this = exp.var(this.name.upper()) 3986 3987 return self.expression( 3988 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3989 ) 3990 3991 def _parse_types( 3992 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3993 ) -> t.Optional[exp.Expression]: 3994 index = self._index 3995 3996 prefix = self._match_text_seq("SYSUDTLIB", ".") 3997 3998 if not self._match_set(self.TYPE_TOKENS): 3999 identifier = allow_identifiers and self._parse_id_var( 4000 any_token=False, tokens=(TokenType.VAR,) 4001 ) 4002 if identifier: 4003 tokens = self.dialect.tokenize(identifier.name) 4004 4005 if len(tokens) != 1: 4006 self.raise_error("Unexpected identifier", self._prev) 4007 4008 if tokens[0].token_type in self.TYPE_TOKENS: 4009 self._prev = tokens[0] 4010 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4011 type_name = identifier.name 4012 4013 while self._match(TokenType.DOT): 4014 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4015 4016 return exp.DataType.build(type_name, udt=True) 4017 else: 4018 self._retreat(self._index - 1) 4019 return None 4020 else: 4021 return None 4022 4023 type_token = self._prev.token_type 4024 4025 if type_token == TokenType.PSEUDO_TYPE: 4026 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4027 4028 if type_token == TokenType.OBJECT_IDENTIFIER: 4029 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4030 4031 nested = type_token in self.NESTED_TYPE_TOKENS 4032 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4033 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4034 expressions = None 4035 maybe_func = False 4036 4037 if self._match(TokenType.L_PAREN): 4038 if is_struct: 4039 expressions = self._parse_csv(self._parse_struct_types) 4040 elif nested: 4041 expressions = self._parse_csv( 4042 lambda: self._parse_types( 4043 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4044 ) 4045 ) 4046 elif type_token in self.ENUM_TYPE_TOKENS: 4047 expressions = self._parse_csv(self._parse_equality) 4048 elif is_aggregate: 4049 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4050 any_token=False, tokens=(TokenType.VAR,) 4051 ) 4052 if not func_or_ident or not self._match(TokenType.COMMA): 4053 return None 4054 expressions = self._parse_csv( 4055 lambda: self._parse_types( 4056 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4057 ) 4058 ) 4059 expressions.insert(0, func_or_ident) 4060 else: 4061 expressions = self._parse_csv(self._parse_type_size) 4062 4063 if not expressions or not self._match(TokenType.R_PAREN): 4064 self._retreat(index) 4065 return None 4066 4067 maybe_func = True 4068 4069 this: t.Optional[exp.Expression] = None 4070 values: t.Optional[t.List[exp.Expression]] = None 4071 4072 if nested and self._match(TokenType.LT): 4073 if is_struct: 4074 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4075 else: 4076 expressions = self._parse_csv( 4077 lambda: self._parse_types( 4078 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4079 ) 4080 ) 4081 4082 if not self._match(TokenType.GT): 4083 self.raise_error("Expecting >") 4084 4085 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4086 values = self._parse_csv(self._parse_conjunction) 4087 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4088 4089 if type_token in self.TIMESTAMPS: 4090 if self._match_text_seq("WITH", "TIME", "ZONE"): 4091 maybe_func = False 4092 tz_type = ( 4093 exp.DataType.Type.TIMETZ 4094 if type_token in self.TIMES 4095 else exp.DataType.Type.TIMESTAMPTZ 4096 ) 4097 this = exp.DataType(this=tz_type, expressions=expressions) 4098 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4099 maybe_func = False 4100 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4101 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4102 maybe_func = False 4103 elif type_token == TokenType.INTERVAL: 4104 unit = self._parse_var(any_token=True, upper=True) 4105 4106 if self._match_text_seq("TO"): 4107 unit = exp.IntervalSpan( 4108 this=unit, expression=self._parse_var(any_token=True, upper=True) 4109 ) 4110 4111 if unit: 4112 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4113 else: 4114 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4115 4116 if maybe_func and check_func: 4117 index2 = self._index 4118 peek = self._parse_string() 4119 4120 if not peek: 4121 self._retreat(index) 4122 return None 4123 4124 self._retreat(index2) 4125 4126 if not this: 4127 if self._match_text_seq("UNSIGNED"): 4128 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4129 if not unsigned_type_token: 4130 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4131 4132 type_token = unsigned_type_token or type_token 4133 4134 this = exp.DataType( 4135 this=exp.DataType.Type[type_token.value], 4136 expressions=expressions, 4137 nested=nested, 4138 values=values, 4139 prefix=prefix, 4140 ) 4141 4142 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4143 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4144 4145 return this 4146 4147 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4148 index = self._index 4149 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4150 self._match(TokenType.COLON) 4151 column_def = self._parse_column_def(this) 4152 4153 if type_required and ( 4154 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4155 ): 4156 self._retreat(index) 4157 return self._parse_types() 4158 4159 return column_def 4160 4161 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4162 if not self._match_text_seq("AT", "TIME", "ZONE"): 4163 return this 4164 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4165 4166 def _parse_column(self) -> t.Optional[exp.Expression]: 4167 this = self._parse_column_reference() 4168 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4169 4170 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4171 this = self._parse_field() 4172 if ( 4173 not this 4174 and self._match(TokenType.VALUES, advance=False) 4175 and self.VALUES_FOLLOWED_BY_PAREN 4176 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4177 ): 4178 this = self._parse_id_var() 4179 4180 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 4181 4182 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4183 this = self._parse_bracket(this) 4184 4185 while self._match_set(self.COLUMN_OPERATORS): 4186 op_token = self._prev.token_type 4187 op = self.COLUMN_OPERATORS.get(op_token) 4188 4189 if op_token == TokenType.DCOLON: 4190 field = self._parse_types() 4191 if not field: 4192 self.raise_error("Expected type") 4193 elif op and self._curr: 4194 field = self._parse_column_reference() 4195 else: 4196 field = self._parse_field(anonymous_func=True, any_token=True) 4197 4198 if isinstance(field, exp.Func) and this: 4199 # bigquery allows function calls like x.y.count(...) 4200 # SAFE.SUBSTR(...) 4201 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4202 this = exp.replace_tree( 4203 this, 4204 lambda n: ( 4205 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4206 if n.table 4207 else n.this 4208 ) 4209 if isinstance(n, exp.Column) 4210 else n, 4211 ) 4212 4213 if op: 4214 this = op(self, this, field) 4215 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4216 this = self.expression( 4217 exp.Column, 4218 this=field, 4219 table=this.this, 4220 db=this.args.get("table"), 4221 catalog=this.args.get("db"), 4222 ) 4223 else: 4224 this = self.expression(exp.Dot, this=this, expression=field) 4225 this = self._parse_bracket(this) 4226 return this 4227 4228 def _parse_primary(self) -> t.Optional[exp.Expression]: 4229 if self._match_set(self.PRIMARY_PARSERS): 4230 token_type = self._prev.token_type 4231 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4232 4233 if token_type == TokenType.STRING: 4234 expressions = [primary] 4235 while self._match(TokenType.STRING): 4236 expressions.append(exp.Literal.string(self._prev.text)) 4237 4238 if len(expressions) > 1: 4239 return self.expression(exp.Concat, expressions=expressions) 4240 4241 return primary 4242 4243 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4244 return exp.Literal.number(f"0.{self._prev.text}") 4245 4246 if self._match(TokenType.L_PAREN): 4247 comments = self._prev_comments 4248 query = self._parse_select() 4249 4250 if query: 4251 expressions = [query] 4252 else: 4253 expressions = self._parse_expressions() 4254 4255 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4256 4257 if isinstance(this, exp.UNWRAPPED_QUERIES): 4258 this = self._parse_set_operations( 4259 self._parse_subquery(this=this, parse_alias=False) 4260 ) 4261 elif isinstance(this, exp.Subquery): 4262 this = self._parse_subquery( 4263 this=self._parse_set_operations(this), parse_alias=False 4264 ) 4265 elif len(expressions) > 1: 4266 this = self.expression(exp.Tuple, expressions=expressions) 4267 else: 4268 this = self.expression(exp.Paren, this=this) 4269 4270 if this: 4271 this.add_comments(comments) 4272 4273 self._match_r_paren(expression=this) 4274 return this 4275 4276 return None 4277 4278 def _parse_field( 4279 self, 4280 any_token: bool = False, 4281 tokens: t.Optional[t.Collection[TokenType]] = None, 4282 anonymous_func: bool = False, 4283 ) -> t.Optional[exp.Expression]: 4284 return ( 4285 self._parse_primary() 4286 or self._parse_function(anonymous=anonymous_func) 4287 or self._parse_id_var(any_token=any_token, tokens=tokens) 4288 ) 4289 4290 def _parse_function( 4291 self, 4292 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4293 anonymous: bool = False, 4294 optional_parens: bool = True, 4295 ) -> t.Optional[exp.Expression]: 4296 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4297 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4298 fn_syntax = False 4299 if ( 4300 self._match(TokenType.L_BRACE, advance=False) 4301 and self._next 4302 and self._next.text.upper() == "FN" 4303 ): 4304 self._advance(2) 4305 fn_syntax = True 4306 4307 func = self._parse_function_call( 4308 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4309 ) 4310 4311 if fn_syntax: 4312 self._match(TokenType.R_BRACE) 4313 4314 return func 4315 4316 def _parse_function_call( 4317 self, 4318 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4319 anonymous: bool = False, 4320 optional_parens: bool = True, 4321 ) -> t.Optional[exp.Expression]: 4322 if not self._curr: 4323 return None 4324 4325 comments = self._curr.comments 4326 token_type = self._curr.token_type 4327 this = self._curr.text 4328 upper = this.upper() 4329 4330 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4331 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4332 self._advance() 4333 return self._parse_window(parser(self)) 4334 4335 if not self._next or self._next.token_type != TokenType.L_PAREN: 4336 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4337 self._advance() 4338 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4339 4340 return None 4341 4342 if token_type not in self.FUNC_TOKENS: 4343 return None 4344 4345 self._advance(2) 4346 4347 parser = self.FUNCTION_PARSERS.get(upper) 4348 if parser and not anonymous: 4349 this = parser(self) 4350 else: 4351 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4352 4353 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4354 this = self.expression(subquery_predicate, this=self._parse_select()) 4355 self._match_r_paren() 4356 return this 4357 4358 if functions is None: 4359 functions = self.FUNCTIONS 4360 4361 function = functions.get(upper) 4362 4363 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4364 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4365 4366 if alias: 4367 args = self._kv_to_prop_eq(args) 4368 4369 if function and not anonymous: 4370 if "dialect" in function.__code__.co_varnames: 4371 func = function(args, dialect=self.dialect) 4372 else: 4373 func = function(args) 4374 4375 func = self.validate_expression(func, args) 4376 if not self.dialect.NORMALIZE_FUNCTIONS: 4377 func.meta["name"] = this 4378 4379 this = func 4380 else: 4381 if token_type == TokenType.IDENTIFIER: 4382 this = exp.Identifier(this=this, quoted=True) 4383 this = self.expression(exp.Anonymous, this=this, expressions=args) 4384 4385 if isinstance(this, exp.Expression): 4386 this.add_comments(comments) 4387 4388 self._match_r_paren(this) 4389 return self._parse_window(this) 4390 4391 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4392 transformed = [] 4393 4394 for e in expressions: 4395 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4396 if isinstance(e, exp.Alias): 4397 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4398 4399 if not isinstance(e, exp.PropertyEQ): 4400 e = self.expression( 4401 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4402 ) 4403 4404 if isinstance(e.this, exp.Column): 4405 e.this.replace(e.this.this) 4406 4407 transformed.append(e) 4408 4409 return transformed 4410 4411 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4412 return self._parse_column_def(self._parse_id_var()) 4413 4414 def _parse_user_defined_function( 4415 self, kind: t.Optional[TokenType] = None 4416 ) -> t.Optional[exp.Expression]: 4417 this = self._parse_id_var() 4418 4419 while self._match(TokenType.DOT): 4420 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4421 4422 if not self._match(TokenType.L_PAREN): 4423 return this 4424 4425 expressions = self._parse_csv(self._parse_function_parameter) 4426 self._match_r_paren() 4427 return self.expression( 4428 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4429 ) 4430 4431 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4432 literal = self._parse_primary() 4433 if literal: 4434 return self.expression(exp.Introducer, this=token.text, expression=literal) 4435 4436 return self.expression(exp.Identifier, this=token.text) 4437 4438 def _parse_session_parameter(self) -> exp.SessionParameter: 4439 kind = None 4440 this = self._parse_id_var() or self._parse_primary() 4441 4442 if this and self._match(TokenType.DOT): 4443 kind = this.name 4444 this = self._parse_var() or self._parse_primary() 4445 4446 return self.expression(exp.SessionParameter, this=this, kind=kind) 4447 4448 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4449 index = self._index 4450 4451 if self._match(TokenType.L_PAREN): 4452 expressions = t.cast( 4453 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4454 ) 4455 4456 if not self._match(TokenType.R_PAREN): 4457 self._retreat(index) 4458 else: 4459 expressions = [self._parse_id_var()] 4460 4461 if self._match_set(self.LAMBDAS): 4462 return self.LAMBDAS[self._prev.token_type](self, expressions) 4463 4464 self._retreat(index) 4465 4466 this: t.Optional[exp.Expression] 4467 4468 if self._match(TokenType.DISTINCT): 4469 this = self.expression( 4470 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4471 ) 4472 else: 4473 this = self._parse_select_or_expression(alias=alias) 4474 4475 return self._parse_limit( 4476 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4477 ) 4478 4479 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4480 index = self._index 4481 4482 if not self._match(TokenType.L_PAREN): 4483 return this 4484 4485 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4486 # expr can be of both types 4487 if self._match_set(self.SELECT_START_TOKENS): 4488 self._retreat(index) 4489 return this 4490 4491 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4492 4493 self._match_r_paren() 4494 return self.expression(exp.Schema, this=this, expressions=args) 4495 4496 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4497 return self._parse_column_def(self._parse_field(any_token=True)) 4498 4499 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4500 # column defs are not really columns, they're identifiers 4501 if isinstance(this, exp.Column): 4502 this = this.this 4503 4504 kind = self._parse_types(schema=True) 4505 4506 if self._match_text_seq("FOR", "ORDINALITY"): 4507 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4508 4509 constraints: t.List[exp.Expression] = [] 4510 4511 if (not kind and self._match(TokenType.ALIAS)) or self._match_text_seq("ALIAS"): 4512 constraints.append( 4513 self.expression( 4514 exp.ComputedColumnConstraint, 4515 this=self._parse_conjunction(), 4516 persisted=self._match_text_seq("PERSISTED"), 4517 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4518 ) 4519 ) 4520 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4521 self._match(TokenType.ALIAS) 4522 constraints.append( 4523 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4524 ) 4525 4526 while True: 4527 constraint = self._parse_column_constraint() 4528 if not constraint: 4529 break 4530 constraints.append(constraint) 4531 4532 if not kind and not constraints: 4533 return this 4534 4535 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4536 4537 def _parse_auto_increment( 4538 self, 4539 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4540 start = None 4541 increment = None 4542 4543 if self._match(TokenType.L_PAREN, advance=False): 4544 args = self._parse_wrapped_csv(self._parse_bitwise) 4545 start = seq_get(args, 0) 4546 increment = seq_get(args, 1) 4547 elif self._match_text_seq("START"): 4548 start = self._parse_bitwise() 4549 self._match_text_seq("INCREMENT") 4550 increment = self._parse_bitwise() 4551 4552 if start and increment: 4553 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4554 4555 return exp.AutoIncrementColumnConstraint() 4556 4557 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4558 if not self._match_text_seq("REFRESH"): 4559 self._retreat(self._index - 1) 4560 return None 4561 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4562 4563 def _parse_compress(self) -> exp.CompressColumnConstraint: 4564 if self._match(TokenType.L_PAREN, advance=False): 4565 return self.expression( 4566 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4567 ) 4568 4569 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4570 4571 def _parse_generated_as_identity( 4572 self, 4573 ) -> ( 4574 exp.GeneratedAsIdentityColumnConstraint 4575 | exp.ComputedColumnConstraint 4576 | exp.GeneratedAsRowColumnConstraint 4577 ): 4578 if self._match_text_seq("BY", "DEFAULT"): 4579 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4580 this = self.expression( 4581 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4582 ) 4583 else: 4584 self._match_text_seq("ALWAYS") 4585 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4586 4587 self._match(TokenType.ALIAS) 4588 4589 if self._match_text_seq("ROW"): 4590 start = self._match_text_seq("START") 4591 if not start: 4592 self._match(TokenType.END) 4593 hidden = self._match_text_seq("HIDDEN") 4594 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4595 4596 identity = self._match_text_seq("IDENTITY") 4597 4598 if self._match(TokenType.L_PAREN): 4599 if self._match(TokenType.START_WITH): 4600 this.set("start", self._parse_bitwise()) 4601 if self._match_text_seq("INCREMENT", "BY"): 4602 this.set("increment", self._parse_bitwise()) 4603 if self._match_text_seq("MINVALUE"): 4604 this.set("minvalue", self._parse_bitwise()) 4605 if self._match_text_seq("MAXVALUE"): 4606 this.set("maxvalue", self._parse_bitwise()) 4607 4608 if self._match_text_seq("CYCLE"): 4609 this.set("cycle", True) 4610 elif self._match_text_seq("NO", "CYCLE"): 4611 this.set("cycle", False) 4612 4613 if not identity: 4614 this.set("expression", self._parse_bitwise()) 4615 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4616 args = self._parse_csv(self._parse_bitwise) 4617 this.set("start", seq_get(args, 0)) 4618 this.set("increment", seq_get(args, 1)) 4619 4620 self._match_r_paren() 4621 4622 return this 4623 4624 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4625 self._match_text_seq("LENGTH") 4626 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4627 4628 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4629 if self._match_text_seq("NULL"): 4630 return self.expression(exp.NotNullColumnConstraint) 4631 if self._match_text_seq("CASESPECIFIC"): 4632 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4633 if self._match_text_seq("FOR", "REPLICATION"): 4634 return self.expression(exp.NotForReplicationColumnConstraint) 4635 return None 4636 4637 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4638 if self._match(TokenType.CONSTRAINT): 4639 this = self._parse_id_var() 4640 else: 4641 this = None 4642 4643 if self._match_texts(self.CONSTRAINT_PARSERS): 4644 return self.expression( 4645 exp.ColumnConstraint, 4646 this=this, 4647 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4648 ) 4649 4650 return this 4651 4652 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4653 if not self._match(TokenType.CONSTRAINT): 4654 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4655 4656 return self.expression( 4657 exp.Constraint, 4658 this=self._parse_id_var(), 4659 expressions=self._parse_unnamed_constraints(), 4660 ) 4661 4662 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4663 constraints = [] 4664 while True: 4665 constraint = self._parse_unnamed_constraint() or self._parse_function() 4666 if not constraint: 4667 break 4668 constraints.append(constraint) 4669 4670 return constraints 4671 4672 def _parse_unnamed_constraint( 4673 self, constraints: t.Optional[t.Collection[str]] = None 4674 ) -> t.Optional[exp.Expression]: 4675 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4676 constraints or self.CONSTRAINT_PARSERS 4677 ): 4678 return None 4679 4680 constraint = self._prev.text.upper() 4681 if constraint not in self.CONSTRAINT_PARSERS: 4682 self.raise_error(f"No parser found for schema constraint {constraint}.") 4683 4684 return self.CONSTRAINT_PARSERS[constraint](self) 4685 4686 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4687 self._match_text_seq("KEY") 4688 return self.expression( 4689 exp.UniqueColumnConstraint, 4690 this=self._parse_schema(self._parse_id_var(any_token=False)), 4691 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4692 on_conflict=self._parse_on_conflict(), 4693 ) 4694 4695 def _parse_key_constraint_options(self) -> t.List[str]: 4696 options = [] 4697 while True: 4698 if not self._curr: 4699 break 4700 4701 if self._match(TokenType.ON): 4702 action = None 4703 on = self._advance_any() and self._prev.text 4704 4705 if self._match_text_seq("NO", "ACTION"): 4706 action = "NO ACTION" 4707 elif self._match_text_seq("CASCADE"): 4708 action = "CASCADE" 4709 elif self._match_text_seq("RESTRICT"): 4710 action = "RESTRICT" 4711 elif self._match_pair(TokenType.SET, TokenType.NULL): 4712 action = "SET NULL" 4713 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4714 action = "SET DEFAULT" 4715 else: 4716 self.raise_error("Invalid key constraint") 4717 4718 options.append(f"ON {on} {action}") 4719 elif self._match_text_seq("NOT", "ENFORCED"): 4720 options.append("NOT ENFORCED") 4721 elif self._match_text_seq("DEFERRABLE"): 4722 options.append("DEFERRABLE") 4723 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4724 options.append("INITIALLY DEFERRED") 4725 elif self._match_text_seq("NORELY"): 4726 options.append("NORELY") 4727 elif self._match_text_seq("MATCH", "FULL"): 4728 options.append("MATCH FULL") 4729 else: 4730 break 4731 4732 return options 4733 4734 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4735 if match and not self._match(TokenType.REFERENCES): 4736 return None 4737 4738 expressions = None 4739 this = self._parse_table(schema=True) 4740 options = self._parse_key_constraint_options() 4741 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4742 4743 def _parse_foreign_key(self) -> exp.ForeignKey: 4744 expressions = self._parse_wrapped_id_vars() 4745 reference = self._parse_references() 4746 options = {} 4747 4748 while self._match(TokenType.ON): 4749 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4750 self.raise_error("Expected DELETE or UPDATE") 4751 4752 kind = self._prev.text.lower() 4753 4754 if self._match_text_seq("NO", "ACTION"): 4755 action = "NO ACTION" 4756 elif self._match(TokenType.SET): 4757 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4758 action = "SET " + self._prev.text.upper() 4759 else: 4760 self._advance() 4761 action = self._prev.text.upper() 4762 4763 options[kind] = action 4764 4765 return self.expression( 4766 exp.ForeignKey, 4767 expressions=expressions, 4768 reference=reference, 4769 **options, # type: ignore 4770 ) 4771 4772 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4773 return self._parse_field() 4774 4775 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4776 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4777 self._retreat(self._index - 1) 4778 return None 4779 4780 id_vars = self._parse_wrapped_id_vars() 4781 return self.expression( 4782 exp.PeriodForSystemTimeConstraint, 4783 this=seq_get(id_vars, 0), 4784 expression=seq_get(id_vars, 1), 4785 ) 4786 4787 def _parse_primary_key( 4788 self, wrapped_optional: bool = False, in_props: bool = False 4789 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4790 desc = ( 4791 self._match_set((TokenType.ASC, TokenType.DESC)) 4792 and self._prev.token_type == TokenType.DESC 4793 ) 4794 4795 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4796 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4797 4798 expressions = self._parse_wrapped_csv( 4799 self._parse_primary_key_part, optional=wrapped_optional 4800 ) 4801 options = self._parse_key_constraint_options() 4802 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4803 4804 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4805 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4806 4807 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4808 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4809 return this 4810 4811 bracket_kind = self._prev.token_type 4812 expressions = self._parse_csv( 4813 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4814 ) 4815 4816 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4817 self.raise_error("Expected ]") 4818 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4819 self.raise_error("Expected }") 4820 4821 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4822 if bracket_kind == TokenType.L_BRACE: 4823 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4824 elif not this or this.name.upper() == "ARRAY": 4825 this = self.expression(exp.Array, expressions=expressions) 4826 else: 4827 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4828 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4829 4830 self._add_comments(this) 4831 return self._parse_bracket(this) 4832 4833 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4834 if self._match(TokenType.COLON): 4835 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4836 return this 4837 4838 def _parse_case(self) -> t.Optional[exp.Expression]: 4839 ifs = [] 4840 default = None 4841 4842 comments = self._prev_comments 4843 expression = self._parse_conjunction() 4844 4845 while self._match(TokenType.WHEN): 4846 this = self._parse_conjunction() 4847 self._match(TokenType.THEN) 4848 then = self._parse_conjunction() 4849 ifs.append(self.expression(exp.If, this=this, true=then)) 4850 4851 if self._match(TokenType.ELSE): 4852 default = self._parse_conjunction() 4853 4854 if not self._match(TokenType.END): 4855 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4856 default = exp.column("interval") 4857 else: 4858 self.raise_error("Expected END after CASE", self._prev) 4859 4860 return self.expression( 4861 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4862 ) 4863 4864 def _parse_if(self) -> t.Optional[exp.Expression]: 4865 if self._match(TokenType.L_PAREN): 4866 args = self._parse_csv(self._parse_conjunction) 4867 this = self.validate_expression(exp.If.from_arg_list(args), args) 4868 self._match_r_paren() 4869 else: 4870 index = self._index - 1 4871 4872 if self.NO_PAREN_IF_COMMANDS and index == 0: 4873 return self._parse_as_command(self._prev) 4874 4875 condition = self._parse_conjunction() 4876 4877 if not condition: 4878 self._retreat(index) 4879 return None 4880 4881 self._match(TokenType.THEN) 4882 true = self._parse_conjunction() 4883 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4884 self._match(TokenType.END) 4885 this = self.expression(exp.If, this=condition, true=true, false=false) 4886 4887 return this 4888 4889 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4890 if not self._match_text_seq("VALUE", "FOR"): 4891 self._retreat(self._index - 1) 4892 return None 4893 4894 return self.expression( 4895 exp.NextValueFor, 4896 this=self._parse_column(), 4897 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4898 ) 4899 4900 def _parse_extract(self) -> exp.Extract: 4901 this = self._parse_function() or self._parse_var() or self._parse_type() 4902 4903 if self._match(TokenType.FROM): 4904 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4905 4906 if not self._match(TokenType.COMMA): 4907 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4908 4909 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4910 4911 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4912 this = self._parse_conjunction() 4913 4914 if not self._match(TokenType.ALIAS): 4915 if self._match(TokenType.COMMA): 4916 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4917 4918 self.raise_error("Expected AS after CAST") 4919 4920 fmt = None 4921 to = self._parse_types() 4922 4923 if self._match(TokenType.FORMAT): 4924 fmt_string = self._parse_string() 4925 fmt = self._parse_at_time_zone(fmt_string) 4926 4927 if not to: 4928 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4929 if to.this in exp.DataType.TEMPORAL_TYPES: 4930 this = self.expression( 4931 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4932 this=this, 4933 format=exp.Literal.string( 4934 format_time( 4935 fmt_string.this if fmt_string else "", 4936 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4937 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4938 ) 4939 ), 4940 ) 4941 4942 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4943 this.set("zone", fmt.args["zone"]) 4944 return this 4945 elif not to: 4946 self.raise_error("Expected TYPE after CAST") 4947 elif isinstance(to, exp.Identifier): 4948 to = exp.DataType.build(to.name, udt=True) 4949 elif to.this == exp.DataType.Type.CHAR: 4950 if self._match(TokenType.CHARACTER_SET): 4951 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4952 4953 return self.expression( 4954 exp.Cast if strict else exp.TryCast, 4955 this=this, 4956 to=to, 4957 format=fmt, 4958 safe=safe, 4959 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 4960 ) 4961 4962 def _parse_string_agg(self) -> exp.Expression: 4963 if self._match(TokenType.DISTINCT): 4964 args: t.List[t.Optional[exp.Expression]] = [ 4965 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4966 ] 4967 if self._match(TokenType.COMMA): 4968 args.extend(self._parse_csv(self._parse_conjunction)) 4969 else: 4970 args = self._parse_csv(self._parse_conjunction) # type: ignore 4971 4972 index = self._index 4973 if not self._match(TokenType.R_PAREN) and args: 4974 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4975 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4976 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4977 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4978 4979 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4980 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4981 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4982 if not self._match_text_seq("WITHIN", "GROUP"): 4983 self._retreat(index) 4984 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4985 4986 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4987 order = self._parse_order(this=seq_get(args, 0)) 4988 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4989 4990 def _parse_convert( 4991 self, strict: bool, safe: t.Optional[bool] = None 4992 ) -> t.Optional[exp.Expression]: 4993 this = self._parse_bitwise() 4994 4995 if self._match(TokenType.USING): 4996 to: t.Optional[exp.Expression] = self.expression( 4997 exp.CharacterSet, this=self._parse_var() 4998 ) 4999 elif self._match(TokenType.COMMA): 5000 to = self._parse_types() 5001 else: 5002 to = None 5003 5004 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5005 5006 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5007 """ 5008 There are generally two variants of the DECODE function: 5009 5010 - DECODE(bin, charset) 5011 - DECODE(expression, search, result [, search, result] ... [, default]) 5012 5013 The second variant will always be parsed into a CASE expression. Note that NULL 5014 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5015 instead of relying on pattern matching. 5016 """ 5017 args = self._parse_csv(self._parse_conjunction) 5018 5019 if len(args) < 3: 5020 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5021 5022 expression, *expressions = args 5023 if not expression: 5024 return None 5025 5026 ifs = [] 5027 for search, result in zip(expressions[::2], expressions[1::2]): 5028 if not search or not result: 5029 return None 5030 5031 if isinstance(search, exp.Literal): 5032 ifs.append( 5033 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5034 ) 5035 elif isinstance(search, exp.Null): 5036 ifs.append( 5037 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5038 ) 5039 else: 5040 cond = exp.or_( 5041 exp.EQ(this=expression.copy(), expression=search), 5042 exp.and_( 5043 exp.Is(this=expression.copy(), expression=exp.Null()), 5044 exp.Is(this=search.copy(), expression=exp.Null()), 5045 copy=False, 5046 ), 5047 copy=False, 5048 ) 5049 ifs.append(exp.If(this=cond, true=result)) 5050 5051 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5052 5053 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5054 self._match_text_seq("KEY") 5055 key = self._parse_column() 5056 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5057 self._match_text_seq("VALUE") 5058 value = self._parse_bitwise() 5059 5060 if not key and not value: 5061 return None 5062 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5063 5064 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5065 if not this or not self._match_text_seq("FORMAT", "JSON"): 5066 return this 5067 5068 return self.expression(exp.FormatJson, this=this) 5069 5070 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5071 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5072 for value in values: 5073 if self._match_text_seq(value, "ON", on): 5074 return f"{value} ON {on}" 5075 5076 return None 5077 5078 @t.overload 5079 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5080 5081 @t.overload 5082 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5083 5084 def _parse_json_object(self, agg=False): 5085 star = self._parse_star() 5086 expressions = ( 5087 [star] 5088 if star 5089 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5090 ) 5091 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5092 5093 unique_keys = None 5094 if self._match_text_seq("WITH", "UNIQUE"): 5095 unique_keys = True 5096 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5097 unique_keys = False 5098 5099 self._match_text_seq("KEYS") 5100 5101 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5102 self._parse_type() 5103 ) 5104 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5105 5106 return self.expression( 5107 exp.JSONObjectAgg if agg else exp.JSONObject, 5108 expressions=expressions, 5109 null_handling=null_handling, 5110 unique_keys=unique_keys, 5111 return_type=return_type, 5112 encoding=encoding, 5113 ) 5114 5115 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5116 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5117 if not self._match_text_seq("NESTED"): 5118 this = self._parse_id_var() 5119 kind = self._parse_types(allow_identifiers=False) 5120 nested = None 5121 else: 5122 this = None 5123 kind = None 5124 nested = True 5125 5126 path = self._match_text_seq("PATH") and self._parse_string() 5127 nested_schema = nested and self._parse_json_schema() 5128 5129 return self.expression( 5130 exp.JSONColumnDef, 5131 this=this, 5132 kind=kind, 5133 path=path, 5134 nested_schema=nested_schema, 5135 ) 5136 5137 def _parse_json_schema(self) -> exp.JSONSchema: 5138 self._match_text_seq("COLUMNS") 5139 return self.expression( 5140 exp.JSONSchema, 5141 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5142 ) 5143 5144 def _parse_json_table(self) -> exp.JSONTable: 5145 this = self._parse_format_json(self._parse_bitwise()) 5146 path = self._match(TokenType.COMMA) and self._parse_string() 5147 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5148 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5149 schema = self._parse_json_schema() 5150 5151 return exp.JSONTable( 5152 this=this, 5153 schema=schema, 5154 path=path, 5155 error_handling=error_handling, 5156 empty_handling=empty_handling, 5157 ) 5158 5159 def _parse_match_against(self) -> exp.MatchAgainst: 5160 expressions = self._parse_csv(self._parse_column) 5161 5162 self._match_text_seq(")", "AGAINST", "(") 5163 5164 this = self._parse_string() 5165 5166 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5167 modifier = "IN NATURAL LANGUAGE MODE" 5168 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5169 modifier = f"{modifier} WITH QUERY EXPANSION" 5170 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5171 modifier = "IN BOOLEAN MODE" 5172 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5173 modifier = "WITH QUERY EXPANSION" 5174 else: 5175 modifier = None 5176 5177 return self.expression( 5178 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5179 ) 5180 5181 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5182 def _parse_open_json(self) -> exp.OpenJSON: 5183 this = self._parse_bitwise() 5184 path = self._match(TokenType.COMMA) and self._parse_string() 5185 5186 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5187 this = self._parse_field(any_token=True) 5188 kind = self._parse_types() 5189 path = self._parse_string() 5190 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5191 5192 return self.expression( 5193 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5194 ) 5195 5196 expressions = None 5197 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5198 self._match_l_paren() 5199 expressions = self._parse_csv(_parse_open_json_column_def) 5200 5201 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5202 5203 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5204 args = self._parse_csv(self._parse_bitwise) 5205 5206 if self._match(TokenType.IN): 5207 return self.expression( 5208 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5209 ) 5210 5211 if haystack_first: 5212 haystack = seq_get(args, 0) 5213 needle = seq_get(args, 1) 5214 else: 5215 needle = seq_get(args, 0) 5216 haystack = seq_get(args, 1) 5217 5218 return self.expression( 5219 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5220 ) 5221 5222 def _parse_predict(self) -> exp.Predict: 5223 self._match_text_seq("MODEL") 5224 this = self._parse_table() 5225 5226 self._match(TokenType.COMMA) 5227 self._match_text_seq("TABLE") 5228 5229 return self.expression( 5230 exp.Predict, 5231 this=this, 5232 expression=self._parse_table(), 5233 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5234 ) 5235 5236 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5237 args = self._parse_csv(self._parse_table) 5238 return exp.JoinHint(this=func_name.upper(), expressions=args) 5239 5240 def _parse_substring(self) -> exp.Substring: 5241 # Postgres supports the form: substring(string [from int] [for int]) 5242 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5243 5244 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5245 5246 if self._match(TokenType.FROM): 5247 args.append(self._parse_bitwise()) 5248 if self._match(TokenType.FOR): 5249 args.append(self._parse_bitwise()) 5250 5251 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5252 5253 def _parse_trim(self) -> exp.Trim: 5254 # https://www.w3resource.com/sql/character-functions/trim.php 5255 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5256 5257 position = None 5258 collation = None 5259 expression = None 5260 5261 if self._match_texts(self.TRIM_TYPES): 5262 position = self._prev.text.upper() 5263 5264 this = self._parse_bitwise() 5265 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5266 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5267 expression = self._parse_bitwise() 5268 5269 if invert_order: 5270 this, expression = expression, this 5271 5272 if self._match(TokenType.COLLATE): 5273 collation = self._parse_bitwise() 5274 5275 return self.expression( 5276 exp.Trim, this=this, position=position, expression=expression, collation=collation 5277 ) 5278 5279 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5280 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5281 5282 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5283 return self._parse_window(self._parse_id_var(), alias=True) 5284 5285 def _parse_respect_or_ignore_nulls( 5286 self, this: t.Optional[exp.Expression] 5287 ) -> t.Optional[exp.Expression]: 5288 if self._match_text_seq("IGNORE", "NULLS"): 5289 return self.expression(exp.IgnoreNulls, this=this) 5290 if self._match_text_seq("RESPECT", "NULLS"): 5291 return self.expression(exp.RespectNulls, this=this) 5292 return this 5293 5294 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5295 if self._match(TokenType.HAVING): 5296 self._match_texts(("MAX", "MIN")) 5297 max = self._prev.text.upper() != "MIN" 5298 return self.expression( 5299 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5300 ) 5301 5302 return this 5303 5304 def _parse_window( 5305 self, this: t.Optional[exp.Expression], alias: bool = False 5306 ) -> t.Optional[exp.Expression]: 5307 func = this 5308 comments = func.comments if isinstance(func, exp.Expression) else None 5309 5310 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5311 self._match(TokenType.WHERE) 5312 this = self.expression( 5313 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5314 ) 5315 self._match_r_paren() 5316 5317 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5318 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5319 if self._match_text_seq("WITHIN", "GROUP"): 5320 order = self._parse_wrapped(self._parse_order) 5321 this = self.expression(exp.WithinGroup, this=this, expression=order) 5322 5323 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5324 # Some dialects choose to implement and some do not. 5325 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5326 5327 # There is some code above in _parse_lambda that handles 5328 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5329 5330 # The below changes handle 5331 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5332 5333 # Oracle allows both formats 5334 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5335 # and Snowflake chose to do the same for familiarity 5336 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5337 if isinstance(this, exp.AggFunc): 5338 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5339 5340 if ignore_respect and ignore_respect is not this: 5341 ignore_respect.replace(ignore_respect.this) 5342 this = self.expression(ignore_respect.__class__, this=this) 5343 5344 this = self._parse_respect_or_ignore_nulls(this) 5345 5346 # bigquery select from window x AS (partition by ...) 5347 if alias: 5348 over = None 5349 self._match(TokenType.ALIAS) 5350 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5351 return this 5352 else: 5353 over = self._prev.text.upper() 5354 5355 if comments: 5356 func.comments = None # type: ignore 5357 5358 if not self._match(TokenType.L_PAREN): 5359 return self.expression( 5360 exp.Window, 5361 comments=comments, 5362 this=this, 5363 alias=self._parse_id_var(False), 5364 over=over, 5365 ) 5366 5367 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5368 5369 first = self._match(TokenType.FIRST) 5370 if self._match_text_seq("LAST"): 5371 first = False 5372 5373 partition, order = self._parse_partition_and_order() 5374 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5375 5376 if kind: 5377 self._match(TokenType.BETWEEN) 5378 start = self._parse_window_spec() 5379 self._match(TokenType.AND) 5380 end = self._parse_window_spec() 5381 5382 spec = self.expression( 5383 exp.WindowSpec, 5384 kind=kind, 5385 start=start["value"], 5386 start_side=start["side"], 5387 end=end["value"], 5388 end_side=end["side"], 5389 ) 5390 else: 5391 spec = None 5392 5393 self._match_r_paren() 5394 5395 window = self.expression( 5396 exp.Window, 5397 comments=comments, 5398 this=this, 5399 partition_by=partition, 5400 order=order, 5401 spec=spec, 5402 alias=window_alias, 5403 over=over, 5404 first=first, 5405 ) 5406 5407 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5408 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5409 return self._parse_window(window, alias=alias) 5410 5411 return window 5412 5413 def _parse_partition_and_order( 5414 self, 5415 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5416 return self._parse_partition_by(), self._parse_order() 5417 5418 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5419 self._match(TokenType.BETWEEN) 5420 5421 return { 5422 "value": ( 5423 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5424 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5425 or self._parse_bitwise() 5426 ), 5427 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5428 } 5429 5430 def _parse_alias( 5431 self, this: t.Optional[exp.Expression], explicit: bool = False 5432 ) -> t.Optional[exp.Expression]: 5433 any_token = self._match(TokenType.ALIAS) 5434 comments = self._prev_comments 5435 5436 if explicit and not any_token: 5437 return this 5438 5439 if self._match(TokenType.L_PAREN): 5440 aliases = self.expression( 5441 exp.Aliases, 5442 comments=comments, 5443 this=this, 5444 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5445 ) 5446 self._match_r_paren(aliases) 5447 return aliases 5448 5449 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5450 self.STRING_ALIASES and self._parse_string_as_identifier() 5451 ) 5452 5453 if alias: 5454 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5455 column = this.this 5456 5457 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5458 if not this.comments and column and column.comments: 5459 this.comments = column.comments 5460 column.comments = None 5461 5462 return this 5463 5464 def _parse_id_var( 5465 self, 5466 any_token: bool = True, 5467 tokens: t.Optional[t.Collection[TokenType]] = None, 5468 ) -> t.Optional[exp.Expression]: 5469 identifier = self._parse_identifier() 5470 5471 if identifier: 5472 return identifier 5473 5474 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5475 quoted = self._prev.token_type == TokenType.STRING 5476 return exp.Identifier(this=self._prev.text, quoted=quoted) 5477 5478 return None 5479 5480 def _parse_string(self) -> t.Optional[exp.Expression]: 5481 if self._match_set(self.STRING_PARSERS): 5482 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5483 return self._parse_placeholder() 5484 5485 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5486 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5487 5488 def _parse_number(self) -> t.Optional[exp.Expression]: 5489 if self._match_set(self.NUMERIC_PARSERS): 5490 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5491 return self._parse_placeholder() 5492 5493 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5494 if self._match(TokenType.IDENTIFIER): 5495 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5496 return self._parse_placeholder() 5497 5498 def _parse_var( 5499 self, 5500 any_token: bool = False, 5501 tokens: t.Optional[t.Collection[TokenType]] = None, 5502 upper: bool = False, 5503 ) -> t.Optional[exp.Expression]: 5504 if ( 5505 (any_token and self._advance_any()) 5506 or self._match(TokenType.VAR) 5507 or (self._match_set(tokens) if tokens else False) 5508 ): 5509 return self.expression( 5510 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5511 ) 5512 return self._parse_placeholder() 5513 5514 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5515 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5516 self._advance() 5517 return self._prev 5518 return None 5519 5520 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5521 return self._parse_var() or self._parse_string() 5522 5523 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5524 return self._parse_primary() or self._parse_var(any_token=True) 5525 5526 def _parse_null(self) -> t.Optional[exp.Expression]: 5527 if self._match_set(self.NULL_TOKENS): 5528 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5529 return self._parse_placeholder() 5530 5531 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5532 if self._match(TokenType.TRUE): 5533 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5534 if self._match(TokenType.FALSE): 5535 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5536 return self._parse_placeholder() 5537 5538 def _parse_star(self) -> t.Optional[exp.Expression]: 5539 if self._match(TokenType.STAR): 5540 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5541 return self._parse_placeholder() 5542 5543 def _parse_parameter(self) -> exp.Parameter: 5544 self._match(TokenType.L_BRACE) 5545 this = self._parse_identifier() or self._parse_primary_or_var() 5546 expression = self._match(TokenType.COLON) and ( 5547 self._parse_identifier() or self._parse_primary_or_var() 5548 ) 5549 self._match(TokenType.R_BRACE) 5550 return self.expression(exp.Parameter, this=this, expression=expression) 5551 5552 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5553 if self._match_set(self.PLACEHOLDER_PARSERS): 5554 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5555 if placeholder: 5556 return placeholder 5557 self._advance(-1) 5558 return None 5559 5560 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5561 if not self._match(TokenType.EXCEPT): 5562 return None 5563 if self._match(TokenType.L_PAREN, advance=False): 5564 return self._parse_wrapped_csv(self._parse_column) 5565 5566 except_column = self._parse_column() 5567 return [except_column] if except_column else None 5568 5569 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5570 if not self._match(TokenType.REPLACE): 5571 return None 5572 if self._match(TokenType.L_PAREN, advance=False): 5573 return self._parse_wrapped_csv(self._parse_expression) 5574 5575 replace_expression = self._parse_expression() 5576 return [replace_expression] if replace_expression else None 5577 5578 def _parse_csv( 5579 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5580 ) -> t.List[exp.Expression]: 5581 parse_result = parse_method() 5582 items = [parse_result] if parse_result is not None else [] 5583 5584 while self._match(sep): 5585 self._add_comments(parse_result) 5586 parse_result = parse_method() 5587 if parse_result is not None: 5588 items.append(parse_result) 5589 5590 return items 5591 5592 def _parse_tokens( 5593 self, parse_method: t.Callable, expressions: t.Dict 5594 ) -> t.Optional[exp.Expression]: 5595 this = parse_method() 5596 5597 while self._match_set(expressions): 5598 this = self.expression( 5599 expressions[self._prev.token_type], 5600 this=this, 5601 comments=self._prev_comments, 5602 expression=parse_method(), 5603 ) 5604 5605 return this 5606 5607 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5608 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5609 5610 def _parse_wrapped_csv( 5611 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5612 ) -> t.List[exp.Expression]: 5613 return self._parse_wrapped( 5614 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5615 ) 5616 5617 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5618 wrapped = self._match(TokenType.L_PAREN) 5619 if not wrapped and not optional: 5620 self.raise_error("Expecting (") 5621 parse_result = parse_method() 5622 if wrapped: 5623 self._match_r_paren() 5624 return parse_result 5625 5626 def _parse_expressions(self) -> t.List[exp.Expression]: 5627 return self._parse_csv(self._parse_expression) 5628 5629 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5630 return self._parse_select() or self._parse_set_operations( 5631 self._parse_expression() if alias else self._parse_conjunction() 5632 ) 5633 5634 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5635 return self._parse_query_modifiers( 5636 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5637 ) 5638 5639 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5640 this = None 5641 if self._match_texts(self.TRANSACTION_KIND): 5642 this = self._prev.text 5643 5644 self._match_texts(("TRANSACTION", "WORK")) 5645 5646 modes = [] 5647 while True: 5648 mode = [] 5649 while self._match(TokenType.VAR): 5650 mode.append(self._prev.text) 5651 5652 if mode: 5653 modes.append(" ".join(mode)) 5654 if not self._match(TokenType.COMMA): 5655 break 5656 5657 return self.expression(exp.Transaction, this=this, modes=modes) 5658 5659 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5660 chain = None 5661 savepoint = None 5662 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5663 5664 self._match_texts(("TRANSACTION", "WORK")) 5665 5666 if self._match_text_seq("TO"): 5667 self._match_text_seq("SAVEPOINT") 5668 savepoint = self._parse_id_var() 5669 5670 if self._match(TokenType.AND): 5671 chain = not self._match_text_seq("NO") 5672 self._match_text_seq("CHAIN") 5673 5674 if is_rollback: 5675 return self.expression(exp.Rollback, savepoint=savepoint) 5676 5677 return self.expression(exp.Commit, chain=chain) 5678 5679 def _parse_refresh(self) -> exp.Refresh: 5680 self._match(TokenType.TABLE) 5681 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5682 5683 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5684 if not self._match_text_seq("ADD"): 5685 return None 5686 5687 self._match(TokenType.COLUMN) 5688 exists_column = self._parse_exists(not_=True) 5689 expression = self._parse_field_def() 5690 5691 if expression: 5692 expression.set("exists", exists_column) 5693 5694 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5695 if self._match_texts(("FIRST", "AFTER")): 5696 position = self._prev.text 5697 column_position = self.expression( 5698 exp.ColumnPosition, this=self._parse_column(), position=position 5699 ) 5700 expression.set("position", column_position) 5701 5702 return expression 5703 5704 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5705 drop = self._match(TokenType.DROP) and self._parse_drop() 5706 if drop and not isinstance(drop, exp.Command): 5707 drop.set("kind", drop.args.get("kind", "COLUMN")) 5708 return drop 5709 5710 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5711 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5712 return self.expression( 5713 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5714 ) 5715 5716 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5717 index = self._index - 1 5718 5719 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5720 return self._parse_csv( 5721 lambda: self.expression( 5722 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5723 ) 5724 ) 5725 5726 self._retreat(index) 5727 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5728 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5729 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5730 5731 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5732 self._match(TokenType.COLUMN) 5733 column = self._parse_field(any_token=True) 5734 5735 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5736 return self.expression(exp.AlterColumn, this=column, drop=True) 5737 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5738 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5739 if self._match(TokenType.COMMENT): 5740 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5741 5742 self._match_text_seq("SET", "DATA") 5743 self._match_text_seq("TYPE") 5744 return self.expression( 5745 exp.AlterColumn, 5746 this=column, 5747 dtype=self._parse_types(), 5748 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5749 using=self._match(TokenType.USING) and self._parse_conjunction(), 5750 ) 5751 5752 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5753 index = self._index - 1 5754 5755 partition_exists = self._parse_exists() 5756 if self._match(TokenType.PARTITION, advance=False): 5757 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5758 5759 self._retreat(index) 5760 return self._parse_csv(self._parse_drop_column) 5761 5762 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5763 if self._match(TokenType.COLUMN): 5764 exists = self._parse_exists() 5765 old_column = self._parse_column() 5766 to = self._match_text_seq("TO") 5767 new_column = self._parse_column() 5768 5769 if old_column is None or to is None or new_column is None: 5770 return None 5771 5772 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5773 5774 self._match_text_seq("TO") 5775 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5776 5777 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5778 start = self._prev 5779 5780 if not self._match(TokenType.TABLE): 5781 return self._parse_as_command(start) 5782 5783 exists = self._parse_exists() 5784 only = self._match_text_seq("ONLY") 5785 this = self._parse_table(schema=True) 5786 5787 if self._next: 5788 self._advance() 5789 5790 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5791 if parser: 5792 actions = ensure_list(parser(self)) 5793 options = self._parse_csv(self._parse_property) 5794 5795 if not self._curr and actions: 5796 return self.expression( 5797 exp.AlterTable, 5798 this=this, 5799 exists=exists, 5800 actions=actions, 5801 only=only, 5802 options=options, 5803 ) 5804 5805 return self._parse_as_command(start) 5806 5807 def _parse_merge(self) -> exp.Merge: 5808 self._match(TokenType.INTO) 5809 target = self._parse_table() 5810 5811 if target and self._match(TokenType.ALIAS, advance=False): 5812 target.set("alias", self._parse_table_alias()) 5813 5814 self._match(TokenType.USING) 5815 using = self._parse_table() 5816 5817 self._match(TokenType.ON) 5818 on = self._parse_conjunction() 5819 5820 return self.expression( 5821 exp.Merge, 5822 this=target, 5823 using=using, 5824 on=on, 5825 expressions=self._parse_when_matched(), 5826 ) 5827 5828 def _parse_when_matched(self) -> t.List[exp.When]: 5829 whens = [] 5830 5831 while self._match(TokenType.WHEN): 5832 matched = not self._match(TokenType.NOT) 5833 self._match_text_seq("MATCHED") 5834 source = ( 5835 False 5836 if self._match_text_seq("BY", "TARGET") 5837 else self._match_text_seq("BY", "SOURCE") 5838 ) 5839 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5840 5841 self._match(TokenType.THEN) 5842 5843 if self._match(TokenType.INSERT): 5844 _this = self._parse_star() 5845 if _this: 5846 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5847 else: 5848 then = self.expression( 5849 exp.Insert, 5850 this=self._parse_value(), 5851 expression=self._match_text_seq("VALUES") and self._parse_value(), 5852 ) 5853 elif self._match(TokenType.UPDATE): 5854 expressions = self._parse_star() 5855 if expressions: 5856 then = self.expression(exp.Update, expressions=expressions) 5857 else: 5858 then = self.expression( 5859 exp.Update, 5860 expressions=self._match(TokenType.SET) 5861 and self._parse_csv(self._parse_equality), 5862 ) 5863 elif self._match(TokenType.DELETE): 5864 then = self.expression(exp.Var, this=self._prev.text) 5865 else: 5866 then = None 5867 5868 whens.append( 5869 self.expression( 5870 exp.When, 5871 matched=matched, 5872 source=source, 5873 condition=condition, 5874 then=then, 5875 ) 5876 ) 5877 return whens 5878 5879 def _parse_show(self) -> t.Optional[exp.Expression]: 5880 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5881 if parser: 5882 return parser(self) 5883 return self._parse_as_command(self._prev) 5884 5885 def _parse_set_item_assignment( 5886 self, kind: t.Optional[str] = None 5887 ) -> t.Optional[exp.Expression]: 5888 index = self._index 5889 5890 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5891 return self._parse_set_transaction(global_=kind == "GLOBAL") 5892 5893 left = self._parse_primary() or self._parse_id_var() 5894 assignment_delimiter = self._match_texts(("=", "TO")) 5895 5896 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5897 self._retreat(index) 5898 return None 5899 5900 right = self._parse_statement() or self._parse_id_var() 5901 this = self.expression(exp.EQ, this=left, expression=right) 5902 5903 return self.expression(exp.SetItem, this=this, kind=kind) 5904 5905 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5906 self._match_text_seq("TRANSACTION") 5907 characteristics = self._parse_csv( 5908 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5909 ) 5910 return self.expression( 5911 exp.SetItem, 5912 expressions=characteristics, 5913 kind="TRANSACTION", 5914 **{"global": global_}, # type: ignore 5915 ) 5916 5917 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5918 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5919 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5920 5921 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5922 index = self._index 5923 set_ = self.expression( 5924 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5925 ) 5926 5927 if self._curr: 5928 self._retreat(index) 5929 return self._parse_as_command(self._prev) 5930 5931 return set_ 5932 5933 def _parse_var_from_options( 5934 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5935 ) -> t.Optional[exp.Var]: 5936 start = self._curr 5937 if not start: 5938 return None 5939 5940 option = start.text.upper() 5941 continuations = options.get(option) 5942 5943 index = self._index 5944 self._advance() 5945 for keywords in continuations or []: 5946 if isinstance(keywords, str): 5947 keywords = (keywords,) 5948 5949 if self._match_text_seq(*keywords): 5950 option = f"{option} {' '.join(keywords)}" 5951 break 5952 else: 5953 if continuations or continuations is None: 5954 if raise_unmatched: 5955 self.raise_error(f"Unknown option {option}") 5956 5957 self._retreat(index) 5958 return None 5959 5960 return exp.var(option) 5961 5962 def _parse_as_command(self, start: Token) -> exp.Command: 5963 while self._curr: 5964 self._advance() 5965 text = self._find_sql(start, self._prev) 5966 size = len(start.text) 5967 self._warn_unsupported() 5968 return exp.Command(this=text[:size], expression=text[size:]) 5969 5970 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5971 settings = [] 5972 5973 self._match_l_paren() 5974 kind = self._parse_id_var() 5975 5976 if self._match(TokenType.L_PAREN): 5977 while True: 5978 key = self._parse_id_var() 5979 value = self._parse_primary() 5980 5981 if not key and value is None: 5982 break 5983 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5984 self._match(TokenType.R_PAREN) 5985 5986 self._match_r_paren() 5987 5988 return self.expression( 5989 exp.DictProperty, 5990 this=this, 5991 kind=kind.this if kind else None, 5992 settings=settings, 5993 ) 5994 5995 def _parse_dict_range(self, this: str) -> exp.DictRange: 5996 self._match_l_paren() 5997 has_min = self._match_text_seq("MIN") 5998 if has_min: 5999 min = self._parse_var() or self._parse_primary() 6000 self._match_text_seq("MAX") 6001 max = self._parse_var() or self._parse_primary() 6002 else: 6003 max = self._parse_var() or self._parse_primary() 6004 min = exp.Literal.number(0) 6005 self._match_r_paren() 6006 return self.expression(exp.DictRange, this=this, min=min, max=max) 6007 6008 def _parse_comprehension( 6009 self, this: t.Optional[exp.Expression] 6010 ) -> t.Optional[exp.Comprehension]: 6011 index = self._index 6012 expression = self._parse_column() 6013 if not self._match(TokenType.IN): 6014 self._retreat(index - 1) 6015 return None 6016 iterator = self._parse_column() 6017 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6018 return self.expression( 6019 exp.Comprehension, 6020 this=this, 6021 expression=expression, 6022 iterator=iterator, 6023 condition=condition, 6024 ) 6025 6026 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6027 if self._match(TokenType.HEREDOC_STRING): 6028 return self.expression(exp.Heredoc, this=self._prev.text) 6029 6030 if not self._match_text_seq("$"): 6031 return None 6032 6033 tags = ["$"] 6034 tag_text = None 6035 6036 if self._is_connected(): 6037 self._advance() 6038 tags.append(self._prev.text.upper()) 6039 else: 6040 self.raise_error("No closing $ found") 6041 6042 if tags[-1] != "$": 6043 if self._is_connected() and self._match_text_seq("$"): 6044 tag_text = tags[-1] 6045 tags.append("$") 6046 else: 6047 self.raise_error("No closing $ found") 6048 6049 heredoc_start = self._curr 6050 6051 while self._curr: 6052 if self._match_text_seq(*tags, advance=False): 6053 this = self._find_sql(heredoc_start, self._prev) 6054 self._advance(len(tags)) 6055 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6056 6057 self._advance() 6058 6059 self.raise_error(f"No closing {''.join(tags)} found") 6060 return None 6061 6062 def _find_parser( 6063 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6064 ) -> t.Optional[t.Callable]: 6065 if not self._curr: 6066 return None 6067 6068 index = self._index 6069 this = [] 6070 while True: 6071 # The current token might be multiple words 6072 curr = self._curr.text.upper() 6073 key = curr.split(" ") 6074 this.append(curr) 6075 6076 self._advance() 6077 result, trie = in_trie(trie, key) 6078 if result == TrieResult.FAILED: 6079 break 6080 6081 if result == TrieResult.EXISTS: 6082 subparser = parsers[" ".join(this)] 6083 return subparser 6084 6085 self._retreat(index) 6086 return None 6087 6088 def _match(self, token_type, advance=True, expression=None): 6089 if not self._curr: 6090 return None 6091 6092 if self._curr.token_type == token_type: 6093 if advance: 6094 self._advance() 6095 self._add_comments(expression) 6096 return True 6097 6098 return None 6099 6100 def _match_set(self, types, advance=True): 6101 if not self._curr: 6102 return None 6103 6104 if self._curr.token_type in types: 6105 if advance: 6106 self._advance() 6107 return True 6108 6109 return None 6110 6111 def _match_pair(self, token_type_a, token_type_b, advance=True): 6112 if not self._curr or not self._next: 6113 return None 6114 6115 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6116 if advance: 6117 self._advance(2) 6118 return True 6119 6120 return None 6121 6122 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6123 if not self._match(TokenType.L_PAREN, expression=expression): 6124 self.raise_error("Expecting (") 6125 6126 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6127 if not self._match(TokenType.R_PAREN, expression=expression): 6128 self.raise_error("Expecting )") 6129 6130 def _match_texts(self, texts, advance=True): 6131 if self._curr and self._curr.text.upper() in texts: 6132 if advance: 6133 self._advance() 6134 return True 6135 return None 6136 6137 def _match_text_seq(self, *texts, advance=True): 6138 index = self._index 6139 for text in texts: 6140 if self._curr and self._curr.text.upper() == text: 6141 self._advance() 6142 else: 6143 self._retreat(index) 6144 return None 6145 6146 if not advance: 6147 self._retreat(index) 6148 6149 return True 6150 6151 def _replace_lambda( 6152 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6153 ) -> t.Optional[exp.Expression]: 6154 if not node: 6155 return node 6156 6157 for column in node.find_all(exp.Column): 6158 if column.parts[0].name in lambda_variables: 6159 dot_or_id = column.to_dot() if column.table else column.this 6160 parent = column.parent 6161 6162 while isinstance(parent, exp.Dot): 6163 if not isinstance(parent.parent, exp.Dot): 6164 parent.replace(dot_or_id) 6165 break 6166 parent = parent.parent 6167 else: 6168 if column is node: 6169 node = dot_or_id 6170 else: 6171 column.replace(dot_or_id) 6172 return node 6173 6174 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6175 start = self._prev 6176 6177 # Not to be confused with TRUNCATE(number, decimals) function call 6178 if self._match(TokenType.L_PAREN): 6179 self._retreat(self._index - 2) 6180 return self._parse_function() 6181 6182 # Clickhouse supports TRUNCATE DATABASE as well 6183 is_database = self._match(TokenType.DATABASE) 6184 6185 self._match(TokenType.TABLE) 6186 6187 exists = self._parse_exists(not_=False) 6188 6189 expressions = self._parse_csv( 6190 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6191 ) 6192 6193 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6194 6195 if self._match_text_seq("RESTART", "IDENTITY"): 6196 identity = "RESTART" 6197 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6198 identity = "CONTINUE" 6199 else: 6200 identity = None 6201 6202 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6203 option = self._prev.text 6204 else: 6205 option = None 6206 6207 partition = self._parse_partition() 6208 6209 # Fallback case 6210 if self._curr: 6211 return self._parse_as_command(start) 6212 6213 return self.expression( 6214 exp.TruncateTable, 6215 expressions=expressions, 6216 is_database=is_database, 6217 exists=exists, 6218 cluster=cluster, 6219 identity=identity, 6220 option=option, 6221 partition=partition, 6222 ) 6223 6224 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6225 this = self._parse_ordered(self._parse_opclass) 6226 6227 if not self._match(TokenType.WITH): 6228 return this 6229 6230 op = self._parse_var(any_token=True) 6231 6232 return self.expression(exp.WithOperator, this=this, op=op)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1128 def __init__( 1129 self, 1130 error_level: t.Optional[ErrorLevel] = None, 1131 error_message_context: int = 100, 1132 max_errors: int = 3, 1133 dialect: DialectType = None, 1134 ): 1135 from sqlglot.dialects import Dialect 1136 1137 self.error_level = error_level or ErrorLevel.IMMEDIATE 1138 self.error_message_context = error_message_context 1139 self.max_errors = max_errors 1140 self.dialect = Dialect.get_or_raise(dialect) 1141 self.reset()
1153 def parse( 1154 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1155 ) -> t.List[t.Optional[exp.Expression]]: 1156 """ 1157 Parses a list of tokens and returns a list of syntax trees, one tree 1158 per parsed SQL statement. 1159 1160 Args: 1161 raw_tokens: The list of tokens. 1162 sql: The original SQL string, used to produce helpful debug messages. 1163 1164 Returns: 1165 The list of the produced syntax trees. 1166 """ 1167 return self._parse( 1168 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1169 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1171 def parse_into( 1172 self, 1173 expression_types: exp.IntoType, 1174 raw_tokens: t.List[Token], 1175 sql: t.Optional[str] = None, 1176 ) -> t.List[t.Optional[exp.Expression]]: 1177 """ 1178 Parses a list of tokens into a given Expression type. If a collection of Expression 1179 types is given instead, this method will try to parse the token list into each one 1180 of them, stopping at the first for which the parsing succeeds. 1181 1182 Args: 1183 expression_types: The expression type(s) to try and parse the token list into. 1184 raw_tokens: The list of tokens. 1185 sql: The original SQL string, used to produce helpful debug messages. 1186 1187 Returns: 1188 The target Expression. 1189 """ 1190 errors = [] 1191 for expression_type in ensure_list(expression_types): 1192 parser = self.EXPRESSION_PARSERS.get(expression_type) 1193 if not parser: 1194 raise TypeError(f"No parser registered for {expression_type}") 1195 1196 try: 1197 return self._parse(parser, raw_tokens, sql) 1198 except ParseError as e: 1199 e.errors[0]["into_expression"] = expression_type 1200 errors.append(e) 1201 1202 raise ParseError( 1203 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1204 errors=merge_errors(errors), 1205 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1242 def check_errors(self) -> None: 1243 """Logs or raises any found errors, depending on the chosen error level setting.""" 1244 if self.error_level == ErrorLevel.WARN: 1245 for error in self.errors: 1246 logger.error(str(error)) 1247 elif self.error_level == ErrorLevel.RAISE and self.errors: 1248 raise ParseError( 1249 concat_messages(self.errors, self.max_errors), 1250 errors=merge_errors(self.errors), 1251 )
Logs or raises any found errors, depending on the chosen error level setting.
1253 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1254 """ 1255 Appends an error in the list of recorded errors or raises it, depending on the chosen 1256 error level setting. 1257 """ 1258 token = token or self._curr or self._prev or Token.string("") 1259 start = token.start 1260 end = token.end + 1 1261 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1262 highlight = self.sql[start:end] 1263 end_context = self.sql[end : end + self.error_message_context] 1264 1265 error = ParseError.new( 1266 f"{message}. Line {token.line}, Col: {token.col}.\n" 1267 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1268 description=message, 1269 line=token.line, 1270 col=token.col, 1271 start_context=start_context, 1272 highlight=highlight, 1273 end_context=end_context, 1274 ) 1275 1276 if self.error_level == ErrorLevel.IMMEDIATE: 1277 raise error 1278 1279 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1281 def expression( 1282 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1283 ) -> E: 1284 """ 1285 Creates a new, validated Expression. 1286 1287 Args: 1288 exp_class: The expression class to instantiate. 1289 comments: An optional list of comments to attach to the expression. 1290 kwargs: The arguments to set for the expression along with their respective values. 1291 1292 Returns: 1293 The target expression. 1294 """ 1295 instance = exp_class(**kwargs) 1296 instance.add_comments(comments) if comments else self._add_comments(instance) 1297 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1304 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1305 """ 1306 Validates an Expression, making sure that all its mandatory arguments are set. 1307 1308 Args: 1309 expression: The expression to validate. 1310 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1311 1312 Returns: 1313 The validated expression. 1314 """ 1315 if self.error_level != ErrorLevel.IGNORE: 1316 for error_message in expression.error_messages(args): 1317 self.raise_error(error_message) 1318 1319 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.