sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18logger = logging.getLogger("sqlglot") 19 20OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 21 22 23def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 24 if len(args) == 1 and args[0].is_star: 25 return exp.StarMap(this=args[0]) 26 27 keys = [] 28 values = [] 29 for i in range(0, len(args), 2): 30 keys.append(args[i]) 31 values.append(args[i + 1]) 32 33 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 34 35 36def build_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 50 # Default argument order is base, expression 51 this = seq_get(args, 0) 52 expression = seq_get(args, 1) 53 54 if expression: 55 if not dialect.LOG_BASE_FIRST: 56 this, expression = expression, this 57 return exp.Log(this=this, expression=expression) 58 59 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 60 61 62def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 63 def _builder(args: t.List, dialect: Dialect) -> E: 64 expression = expr_type( 65 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 66 ) 67 if len(args) > 2 and expr_type is exp.JSONExtract: 68 expression.set("expressions", args[2:]) 69 70 return expression 71 72 return _builder 73 74 75class _Parser(type): 76 def __new__(cls, clsname, bases, attrs): 77 klass = super().__new__(cls, clsname, bases, attrs) 78 79 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 80 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 81 82 return klass 83 84 85class Parser(metaclass=_Parser): 86 """ 87 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 88 89 Args: 90 error_level: The desired error level. 91 Default: ErrorLevel.IMMEDIATE 92 error_message_context: The amount of context to capture from a query string when displaying 93 the error message (in number of characters). 94 Default: 100 95 max_errors: Maximum number of error messages to include in a raised ParseError. 96 This is only relevant if error_level is ErrorLevel.RAISE. 97 Default: 3 98 """ 99 100 FUNCTIONS: t.Dict[str, t.Callable] = { 101 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 102 "CONCAT": lambda args, dialect: exp.Concat( 103 expressions=args, 104 safe=not dialect.STRICT_STRING_CONCAT, 105 coalesce=dialect.CONCAT_COALESCE, 106 ), 107 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 108 expressions=args, 109 safe=not dialect.STRICT_STRING_CONCAT, 110 coalesce=dialect.CONCAT_COALESCE, 111 ), 112 "DATE_TO_DATE_STR": lambda args: exp.Cast( 113 this=seq_get(args, 0), 114 to=exp.DataType(this=exp.DataType.Type.TEXT), 115 ), 116 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 117 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 118 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 119 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 120 "LIKE": build_like, 121 "LOG": build_logarithm, 122 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 123 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 124 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 125 "TIME_TO_TIME_STR": lambda args: exp.Cast( 126 this=seq_get(args, 0), 127 to=exp.DataType(this=exp.DataType.Type.TEXT), 128 ), 129 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 130 this=exp.Cast( 131 this=seq_get(args, 0), 132 to=exp.DataType(this=exp.DataType.Type.TEXT), 133 ), 134 start=exp.Literal.number(1), 135 length=exp.Literal.number(10), 136 ), 137 "VAR_MAP": build_var_map, 138 } 139 140 NO_PAREN_FUNCTIONS = { 141 TokenType.CURRENT_DATE: exp.CurrentDate, 142 TokenType.CURRENT_DATETIME: exp.CurrentDate, 143 TokenType.CURRENT_TIME: exp.CurrentTime, 144 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 145 TokenType.CURRENT_USER: exp.CurrentUser, 146 } 147 148 STRUCT_TYPE_TOKENS = { 149 TokenType.NESTED, 150 TokenType.OBJECT, 151 TokenType.STRUCT, 152 } 153 154 NESTED_TYPE_TOKENS = { 155 TokenType.ARRAY, 156 TokenType.LOWCARDINALITY, 157 TokenType.MAP, 158 TokenType.NULLABLE, 159 *STRUCT_TYPE_TOKENS, 160 } 161 162 ENUM_TYPE_TOKENS = { 163 TokenType.ENUM, 164 TokenType.ENUM8, 165 TokenType.ENUM16, 166 } 167 168 AGGREGATE_TYPE_TOKENS = { 169 TokenType.AGGREGATEFUNCTION, 170 TokenType.SIMPLEAGGREGATEFUNCTION, 171 } 172 173 TYPE_TOKENS = { 174 TokenType.BIT, 175 TokenType.BOOLEAN, 176 TokenType.TINYINT, 177 TokenType.UTINYINT, 178 TokenType.SMALLINT, 179 TokenType.USMALLINT, 180 TokenType.INT, 181 TokenType.UINT, 182 TokenType.BIGINT, 183 TokenType.UBIGINT, 184 TokenType.INT128, 185 TokenType.UINT128, 186 TokenType.INT256, 187 TokenType.UINT256, 188 TokenType.MEDIUMINT, 189 TokenType.UMEDIUMINT, 190 TokenType.FIXEDSTRING, 191 TokenType.FLOAT, 192 TokenType.DOUBLE, 193 TokenType.CHAR, 194 TokenType.NCHAR, 195 TokenType.VARCHAR, 196 TokenType.NVARCHAR, 197 TokenType.BPCHAR, 198 TokenType.TEXT, 199 TokenType.MEDIUMTEXT, 200 TokenType.LONGTEXT, 201 TokenType.MEDIUMBLOB, 202 TokenType.LONGBLOB, 203 TokenType.BINARY, 204 TokenType.VARBINARY, 205 TokenType.JSON, 206 TokenType.JSONB, 207 TokenType.INTERVAL, 208 TokenType.TINYBLOB, 209 TokenType.TINYTEXT, 210 TokenType.TIME, 211 TokenType.TIMETZ, 212 TokenType.TIMESTAMP, 213 TokenType.TIMESTAMP_S, 214 TokenType.TIMESTAMP_MS, 215 TokenType.TIMESTAMP_NS, 216 TokenType.TIMESTAMPTZ, 217 TokenType.TIMESTAMPLTZ, 218 TokenType.DATETIME, 219 TokenType.DATETIME64, 220 TokenType.DATE, 221 TokenType.DATE32, 222 TokenType.INT4RANGE, 223 TokenType.INT4MULTIRANGE, 224 TokenType.INT8RANGE, 225 TokenType.INT8MULTIRANGE, 226 TokenType.NUMRANGE, 227 TokenType.NUMMULTIRANGE, 228 TokenType.TSRANGE, 229 TokenType.TSMULTIRANGE, 230 TokenType.TSTZRANGE, 231 TokenType.TSTZMULTIRANGE, 232 TokenType.DATERANGE, 233 TokenType.DATEMULTIRANGE, 234 TokenType.DECIMAL, 235 TokenType.UDECIMAL, 236 TokenType.BIGDECIMAL, 237 TokenType.UUID, 238 TokenType.GEOGRAPHY, 239 TokenType.GEOMETRY, 240 TokenType.HLLSKETCH, 241 TokenType.HSTORE, 242 TokenType.PSEUDO_TYPE, 243 TokenType.SUPER, 244 TokenType.SERIAL, 245 TokenType.SMALLSERIAL, 246 TokenType.BIGSERIAL, 247 TokenType.XML, 248 TokenType.YEAR, 249 TokenType.UNIQUEIDENTIFIER, 250 TokenType.USERDEFINED, 251 TokenType.MONEY, 252 TokenType.SMALLMONEY, 253 TokenType.ROWVERSION, 254 TokenType.IMAGE, 255 TokenType.VARIANT, 256 TokenType.OBJECT, 257 TokenType.OBJECT_IDENTIFIER, 258 TokenType.INET, 259 TokenType.IPADDRESS, 260 TokenType.IPPREFIX, 261 TokenType.IPV4, 262 TokenType.IPV6, 263 TokenType.UNKNOWN, 264 TokenType.NULL, 265 TokenType.NAME, 266 *ENUM_TYPE_TOKENS, 267 *NESTED_TYPE_TOKENS, 268 *AGGREGATE_TYPE_TOKENS, 269 } 270 271 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 272 TokenType.BIGINT: TokenType.UBIGINT, 273 TokenType.INT: TokenType.UINT, 274 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 275 TokenType.SMALLINT: TokenType.USMALLINT, 276 TokenType.TINYINT: TokenType.UTINYINT, 277 TokenType.DECIMAL: TokenType.UDECIMAL, 278 } 279 280 SUBQUERY_PREDICATES = { 281 TokenType.ANY: exp.Any, 282 TokenType.ALL: exp.All, 283 TokenType.EXISTS: exp.Exists, 284 TokenType.SOME: exp.Any, 285 } 286 287 RESERVED_TOKENS = { 288 *Tokenizer.SINGLE_TOKENS.values(), 289 TokenType.SELECT, 290 } 291 292 DB_CREATABLES = { 293 TokenType.DATABASE, 294 TokenType.SCHEMA, 295 TokenType.TABLE, 296 TokenType.VIEW, 297 TokenType.MODEL, 298 TokenType.DICTIONARY, 299 TokenType.SEQUENCE, 300 TokenType.STORAGE_INTEGRATION, 301 } 302 303 CREATABLES = { 304 TokenType.COLUMN, 305 TokenType.CONSTRAINT, 306 TokenType.FUNCTION, 307 TokenType.INDEX, 308 TokenType.PROCEDURE, 309 TokenType.FOREIGN_KEY, 310 *DB_CREATABLES, 311 } 312 313 # Tokens that can represent identifiers 314 ID_VAR_TOKENS = { 315 TokenType.VAR, 316 TokenType.ANTI, 317 TokenType.APPLY, 318 TokenType.ASC, 319 TokenType.ASOF, 320 TokenType.AUTO_INCREMENT, 321 TokenType.BEGIN, 322 TokenType.BPCHAR, 323 TokenType.CACHE, 324 TokenType.CASE, 325 TokenType.COLLATE, 326 TokenType.COMMAND, 327 TokenType.COMMENT, 328 TokenType.COMMIT, 329 TokenType.CONSTRAINT, 330 TokenType.DEFAULT, 331 TokenType.DELETE, 332 TokenType.DESC, 333 TokenType.DESCRIBE, 334 TokenType.DICTIONARY, 335 TokenType.DIV, 336 TokenType.END, 337 TokenType.EXECUTE, 338 TokenType.ESCAPE, 339 TokenType.FALSE, 340 TokenType.FIRST, 341 TokenType.FILTER, 342 TokenType.FINAL, 343 TokenType.FORMAT, 344 TokenType.FULL, 345 TokenType.IS, 346 TokenType.ISNULL, 347 TokenType.INTERVAL, 348 TokenType.KEEP, 349 TokenType.KILL, 350 TokenType.LEFT, 351 TokenType.LOAD, 352 TokenType.MERGE, 353 TokenType.NATURAL, 354 TokenType.NEXT, 355 TokenType.OFFSET, 356 TokenType.OPERATOR, 357 TokenType.ORDINALITY, 358 TokenType.OVERLAPS, 359 TokenType.OVERWRITE, 360 TokenType.PARTITION, 361 TokenType.PERCENT, 362 TokenType.PIVOT, 363 TokenType.PRAGMA, 364 TokenType.RANGE, 365 TokenType.RECURSIVE, 366 TokenType.REFERENCES, 367 TokenType.REFRESH, 368 TokenType.REPLACE, 369 TokenType.RIGHT, 370 TokenType.ROW, 371 TokenType.ROWS, 372 TokenType.SEMI, 373 TokenType.SET, 374 TokenType.SETTINGS, 375 TokenType.SHOW, 376 TokenType.TEMPORARY, 377 TokenType.TOP, 378 TokenType.TRUE, 379 TokenType.TRUNCATE, 380 TokenType.UNIQUE, 381 TokenType.UNPIVOT, 382 TokenType.UPDATE, 383 TokenType.USE, 384 TokenType.VOLATILE, 385 TokenType.WINDOW, 386 *CREATABLES, 387 *SUBQUERY_PREDICATES, 388 *TYPE_TOKENS, 389 *NO_PAREN_FUNCTIONS, 390 } 391 392 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 393 394 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 395 TokenType.ANTI, 396 TokenType.APPLY, 397 TokenType.ASOF, 398 TokenType.FULL, 399 TokenType.LEFT, 400 TokenType.LOCK, 401 TokenType.NATURAL, 402 TokenType.OFFSET, 403 TokenType.RIGHT, 404 TokenType.SEMI, 405 TokenType.WINDOW, 406 } 407 408 ALIAS_TOKENS = ID_VAR_TOKENS 409 410 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 411 412 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 413 414 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 415 416 FUNC_TOKENS = { 417 TokenType.COLLATE, 418 TokenType.COMMAND, 419 TokenType.CURRENT_DATE, 420 TokenType.CURRENT_DATETIME, 421 TokenType.CURRENT_TIMESTAMP, 422 TokenType.CURRENT_TIME, 423 TokenType.CURRENT_USER, 424 TokenType.FILTER, 425 TokenType.FIRST, 426 TokenType.FORMAT, 427 TokenType.GLOB, 428 TokenType.IDENTIFIER, 429 TokenType.INDEX, 430 TokenType.ISNULL, 431 TokenType.ILIKE, 432 TokenType.INSERT, 433 TokenType.LIKE, 434 TokenType.MERGE, 435 TokenType.OFFSET, 436 TokenType.PRIMARY_KEY, 437 TokenType.RANGE, 438 TokenType.REPLACE, 439 TokenType.RLIKE, 440 TokenType.ROW, 441 TokenType.UNNEST, 442 TokenType.VAR, 443 TokenType.LEFT, 444 TokenType.RIGHT, 445 TokenType.SEQUENCE, 446 TokenType.DATE, 447 TokenType.DATETIME, 448 TokenType.TABLE, 449 TokenType.TIMESTAMP, 450 TokenType.TIMESTAMPTZ, 451 TokenType.TRUNCATE, 452 TokenType.WINDOW, 453 TokenType.XOR, 454 *TYPE_TOKENS, 455 *SUBQUERY_PREDICATES, 456 } 457 458 CONJUNCTION = { 459 TokenType.AND: exp.And, 460 TokenType.OR: exp.Or, 461 } 462 463 EQUALITY = { 464 TokenType.COLON_EQ: exp.PropertyEQ, 465 TokenType.EQ: exp.EQ, 466 TokenType.NEQ: exp.NEQ, 467 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 468 } 469 470 COMPARISON = { 471 TokenType.GT: exp.GT, 472 TokenType.GTE: exp.GTE, 473 TokenType.LT: exp.LT, 474 TokenType.LTE: exp.LTE, 475 } 476 477 BITWISE = { 478 TokenType.AMP: exp.BitwiseAnd, 479 TokenType.CARET: exp.BitwiseXor, 480 TokenType.PIPE: exp.BitwiseOr, 481 } 482 483 TERM = { 484 TokenType.DASH: exp.Sub, 485 TokenType.PLUS: exp.Add, 486 TokenType.MOD: exp.Mod, 487 TokenType.COLLATE: exp.Collate, 488 } 489 490 FACTOR = { 491 TokenType.DIV: exp.IntDiv, 492 TokenType.LR_ARROW: exp.Distance, 493 TokenType.SLASH: exp.Div, 494 TokenType.STAR: exp.Mul, 495 } 496 497 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 498 499 TIMES = { 500 TokenType.TIME, 501 TokenType.TIMETZ, 502 } 503 504 TIMESTAMPS = { 505 TokenType.TIMESTAMP, 506 TokenType.TIMESTAMPTZ, 507 TokenType.TIMESTAMPLTZ, 508 *TIMES, 509 } 510 511 SET_OPERATIONS = { 512 TokenType.UNION, 513 TokenType.INTERSECT, 514 TokenType.EXCEPT, 515 } 516 517 JOIN_METHODS = { 518 TokenType.ASOF, 519 TokenType.NATURAL, 520 TokenType.POSITIONAL, 521 } 522 523 JOIN_SIDES = { 524 TokenType.LEFT, 525 TokenType.RIGHT, 526 TokenType.FULL, 527 } 528 529 JOIN_KINDS = { 530 TokenType.INNER, 531 TokenType.OUTER, 532 TokenType.CROSS, 533 TokenType.SEMI, 534 TokenType.ANTI, 535 } 536 537 JOIN_HINTS: t.Set[str] = set() 538 539 LAMBDAS = { 540 TokenType.ARROW: lambda self, expressions: self.expression( 541 exp.Lambda, 542 this=self._replace_lambda( 543 self._parse_conjunction(), 544 {node.name for node in expressions}, 545 ), 546 expressions=expressions, 547 ), 548 TokenType.FARROW: lambda self, expressions: self.expression( 549 exp.Kwarg, 550 this=exp.var(expressions[0].name), 551 expression=self._parse_conjunction(), 552 ), 553 } 554 555 COLUMN_OPERATORS = { 556 TokenType.DOT: None, 557 TokenType.DCOLON: lambda self, this, to: self.expression( 558 exp.Cast if self.STRICT_CAST else exp.TryCast, 559 this=this, 560 to=to, 561 ), 562 TokenType.ARROW: lambda self, this, path: self.expression( 563 exp.JSONExtract, 564 this=this, 565 expression=self.dialect.to_json_path(path), 566 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 567 ), 568 TokenType.DARROW: lambda self, this, path: self.expression( 569 exp.JSONExtractScalar, 570 this=this, 571 expression=self.dialect.to_json_path(path), 572 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 573 ), 574 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 575 exp.JSONBExtract, 576 this=this, 577 expression=path, 578 ), 579 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 580 exp.JSONBExtractScalar, 581 this=this, 582 expression=path, 583 ), 584 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 585 exp.JSONBContains, 586 this=this, 587 expression=key, 588 ), 589 } 590 591 EXPRESSION_PARSERS = { 592 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 593 exp.Column: lambda self: self._parse_column(), 594 exp.Condition: lambda self: self._parse_conjunction(), 595 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 596 exp.Expression: lambda self: self._parse_expression(), 597 exp.From: lambda self: self._parse_from(), 598 exp.Group: lambda self: self._parse_group(), 599 exp.Having: lambda self: self._parse_having(), 600 exp.Identifier: lambda self: self._parse_id_var(), 601 exp.Join: lambda self: self._parse_join(), 602 exp.Lambda: lambda self: self._parse_lambda(), 603 exp.Lateral: lambda self: self._parse_lateral(), 604 exp.Limit: lambda self: self._parse_limit(), 605 exp.Offset: lambda self: self._parse_offset(), 606 exp.Order: lambda self: self._parse_order(), 607 exp.Ordered: lambda self: self._parse_ordered(), 608 exp.Properties: lambda self: self._parse_properties(), 609 exp.Qualify: lambda self: self._parse_qualify(), 610 exp.Returning: lambda self: self._parse_returning(), 611 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 612 exp.Table: lambda self: self._parse_table_parts(), 613 exp.TableAlias: lambda self: self._parse_table_alias(), 614 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 615 exp.Where: lambda self: self._parse_where(), 616 exp.Window: lambda self: self._parse_named_window(), 617 exp.With: lambda self: self._parse_with(), 618 "JOIN_TYPE": lambda self: self._parse_join_parts(), 619 } 620 621 STATEMENT_PARSERS = { 622 TokenType.ALTER: lambda self: self._parse_alter(), 623 TokenType.BEGIN: lambda self: self._parse_transaction(), 624 TokenType.CACHE: lambda self: self._parse_cache(), 625 TokenType.COMMENT: lambda self: self._parse_comment(), 626 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 627 TokenType.CREATE: lambda self: self._parse_create(), 628 TokenType.DELETE: lambda self: self._parse_delete(), 629 TokenType.DESC: lambda self: self._parse_describe(), 630 TokenType.DESCRIBE: lambda self: self._parse_describe(), 631 TokenType.DROP: lambda self: self._parse_drop(), 632 TokenType.INSERT: lambda self: self._parse_insert(), 633 TokenType.KILL: lambda self: self._parse_kill(), 634 TokenType.LOAD: lambda self: self._parse_load(), 635 TokenType.MERGE: lambda self: self._parse_merge(), 636 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 637 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 638 TokenType.REFRESH: lambda self: self._parse_refresh(), 639 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 640 TokenType.SET: lambda self: self._parse_set(), 641 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 642 TokenType.UNCACHE: lambda self: self._parse_uncache(), 643 TokenType.UPDATE: lambda self: self._parse_update(), 644 TokenType.USE: lambda self: self.expression( 645 exp.Use, 646 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 647 this=self._parse_table(schema=False), 648 ), 649 } 650 651 UNARY_PARSERS = { 652 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 653 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 654 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 655 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 656 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 657 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 658 } 659 660 STRING_PARSERS = { 661 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 662 exp.RawString, this=token.text 663 ), 664 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 665 exp.National, this=token.text 666 ), 667 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 668 TokenType.STRING: lambda self, token: self.expression( 669 exp.Literal, this=token.text, is_string=True 670 ), 671 TokenType.UNICODE_STRING: lambda self, token: self.expression( 672 exp.UnicodeString, 673 this=token.text, 674 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 675 ), 676 } 677 678 NUMERIC_PARSERS = { 679 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 680 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 681 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 682 TokenType.NUMBER: lambda self, token: self.expression( 683 exp.Literal, this=token.text, is_string=False 684 ), 685 } 686 687 PRIMARY_PARSERS = { 688 **STRING_PARSERS, 689 **NUMERIC_PARSERS, 690 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 691 TokenType.NULL: lambda self, _: self.expression(exp.Null), 692 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 693 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 694 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 695 TokenType.STAR: lambda self, _: self.expression( 696 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 697 ), 698 } 699 700 PLACEHOLDER_PARSERS = { 701 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 702 TokenType.PARAMETER: lambda self: self._parse_parameter(), 703 TokenType.COLON: lambda self: ( 704 self.expression(exp.Placeholder, this=self._prev.text) 705 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 706 else None 707 ), 708 } 709 710 RANGE_PARSERS = { 711 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 712 TokenType.GLOB: binary_range_parser(exp.Glob), 713 TokenType.ILIKE: binary_range_parser(exp.ILike), 714 TokenType.IN: lambda self, this: self._parse_in(this), 715 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 716 TokenType.IS: lambda self, this: self._parse_is(this), 717 TokenType.LIKE: binary_range_parser(exp.Like), 718 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 719 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 720 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 721 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 722 } 723 724 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 725 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 726 "AUTO": lambda self: self._parse_auto_property(), 727 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 728 "BACKUP": lambda self: self.expression( 729 exp.BackupProperty, this=self._parse_var(any_token=True) 730 ), 731 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 732 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 733 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 734 "CHECKSUM": lambda self: self._parse_checksum(), 735 "CLUSTER BY": lambda self: self._parse_cluster(), 736 "CLUSTERED": lambda self: self._parse_clustered_by(), 737 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 738 exp.CollateProperty, **kwargs 739 ), 740 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 741 "CONTAINS": lambda self: self._parse_contains_property(), 742 "COPY": lambda self: self._parse_copy_property(), 743 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 744 "DEFINER": lambda self: self._parse_definer(), 745 "DETERMINISTIC": lambda self: self.expression( 746 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 747 ), 748 "DISTKEY": lambda self: self._parse_distkey(), 749 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 750 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 751 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 752 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 753 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 754 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 755 "FREESPACE": lambda self: self._parse_freespace(), 756 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 757 "HEAP": lambda self: self.expression(exp.HeapProperty), 758 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 759 "IMMUTABLE": lambda self: self.expression( 760 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 761 ), 762 "INHERITS": lambda self: self.expression( 763 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 764 ), 765 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 766 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 767 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 768 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 769 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 770 "LIKE": lambda self: self._parse_create_like(), 771 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 772 "LOCK": lambda self: self._parse_locking(), 773 "LOCKING": lambda self: self._parse_locking(), 774 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 775 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 776 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 777 "MODIFIES": lambda self: self._parse_modifies_property(), 778 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 779 "NO": lambda self: self._parse_no_property(), 780 "ON": lambda self: self._parse_on_property(), 781 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 782 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 783 "PARTITION": lambda self: self._parse_partitioned_of(), 784 "PARTITION BY": lambda self: self._parse_partitioned_by(), 785 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 786 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 787 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 788 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 789 "READS": lambda self: self._parse_reads_property(), 790 "REMOTE": lambda self: self._parse_remote_with_connection(), 791 "RETURNS": lambda self: self._parse_returns(), 792 "ROW": lambda self: self._parse_row(), 793 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 794 "SAMPLE": lambda self: self.expression( 795 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 796 ), 797 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 798 "SETTINGS": lambda self: self.expression( 799 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 800 ), 801 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 802 "SORTKEY": lambda self: self._parse_sortkey(), 803 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 804 "STABLE": lambda self: self.expression( 805 exp.StabilityProperty, this=exp.Literal.string("STABLE") 806 ), 807 "STORED": lambda self: self._parse_stored(), 808 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 809 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 810 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 811 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 812 "TO": lambda self: self._parse_to_table(), 813 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 814 "TRANSFORM": lambda self: self.expression( 815 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 816 ), 817 "TTL": lambda self: self._parse_ttl(), 818 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 819 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 820 "VOLATILE": lambda self: self._parse_volatile_property(), 821 "WITH": lambda self: self._parse_with_property(), 822 } 823 824 CONSTRAINT_PARSERS = { 825 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 826 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 827 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 828 "CHARACTER SET": lambda self: self.expression( 829 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 830 ), 831 "CHECK": lambda self: self.expression( 832 exp.CheckColumnConstraint, 833 this=self._parse_wrapped(self._parse_conjunction), 834 enforced=self._match_text_seq("ENFORCED"), 835 ), 836 "COLLATE": lambda self: self.expression( 837 exp.CollateColumnConstraint, this=self._parse_var() 838 ), 839 "COMMENT": lambda self: self.expression( 840 exp.CommentColumnConstraint, this=self._parse_string() 841 ), 842 "COMPRESS": lambda self: self._parse_compress(), 843 "CLUSTERED": lambda self: self.expression( 844 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 845 ), 846 "NONCLUSTERED": lambda self: self.expression( 847 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 848 ), 849 "DEFAULT": lambda self: self.expression( 850 exp.DefaultColumnConstraint, this=self._parse_bitwise() 851 ), 852 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 853 "EXCLUDE": lambda self: self.expression( 854 exp.ExcludeColumnConstraint, this=self._parse_index_params() 855 ), 856 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 857 "FORMAT": lambda self: self.expression( 858 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 859 ), 860 "GENERATED": lambda self: self._parse_generated_as_identity(), 861 "IDENTITY": lambda self: self._parse_auto_increment(), 862 "INLINE": lambda self: self._parse_inline(), 863 "LIKE": lambda self: self._parse_create_like(), 864 "NOT": lambda self: self._parse_not_constraint(), 865 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 866 "ON": lambda self: ( 867 self._match(TokenType.UPDATE) 868 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 869 ) 870 or self.expression(exp.OnProperty, this=self._parse_id_var()), 871 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 872 "PERIOD": lambda self: self._parse_period_for_system_time(), 873 "PRIMARY KEY": lambda self: self._parse_primary_key(), 874 "REFERENCES": lambda self: self._parse_references(match=False), 875 "TITLE": lambda self: self.expression( 876 exp.TitleColumnConstraint, this=self._parse_var_or_string() 877 ), 878 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 879 "UNIQUE": lambda self: self._parse_unique(), 880 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 881 "WITH": lambda self: self.expression( 882 exp.Properties, expressions=self._parse_wrapped_properties() 883 ), 884 } 885 886 ALTER_PARSERS = { 887 "ADD": lambda self: self._parse_alter_table_add(), 888 "ALTER": lambda self: self._parse_alter_table_alter(), 889 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 890 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 891 "DROP": lambda self: self._parse_alter_table_drop(), 892 "RENAME": lambda self: self._parse_alter_table_rename(), 893 } 894 895 SCHEMA_UNNAMED_CONSTRAINTS = { 896 "CHECK", 897 "EXCLUDE", 898 "FOREIGN KEY", 899 "LIKE", 900 "PERIOD", 901 "PRIMARY KEY", 902 "UNIQUE", 903 } 904 905 NO_PAREN_FUNCTION_PARSERS = { 906 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 907 "CASE": lambda self: self._parse_case(), 908 "IF": lambda self: self._parse_if(), 909 "NEXT": lambda self: self._parse_next_value_for(), 910 } 911 912 INVALID_FUNC_NAME_TOKENS = { 913 TokenType.IDENTIFIER, 914 TokenType.STRING, 915 } 916 917 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 918 919 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 920 921 FUNCTION_PARSERS = { 922 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 923 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 924 "DECODE": lambda self: self._parse_decode(), 925 "EXTRACT": lambda self: self._parse_extract(), 926 "JSON_OBJECT": lambda self: self._parse_json_object(), 927 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 928 "JSON_TABLE": lambda self: self._parse_json_table(), 929 "MATCH": lambda self: self._parse_match_against(), 930 "OPENJSON": lambda self: self._parse_open_json(), 931 "POSITION": lambda self: self._parse_position(), 932 "PREDICT": lambda self: self._parse_predict(), 933 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 934 "STRING_AGG": lambda self: self._parse_string_agg(), 935 "SUBSTRING": lambda self: self._parse_substring(), 936 "TRIM": lambda self: self._parse_trim(), 937 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 938 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 939 } 940 941 QUERY_MODIFIER_PARSERS = { 942 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 943 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 944 TokenType.WHERE: lambda self: ("where", self._parse_where()), 945 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 946 TokenType.HAVING: lambda self: ("having", self._parse_having()), 947 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 948 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 949 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 950 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 951 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 952 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 953 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 954 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 955 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 956 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 957 TokenType.CLUSTER_BY: lambda self: ( 958 "cluster", 959 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 960 ), 961 TokenType.DISTRIBUTE_BY: lambda self: ( 962 "distribute", 963 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 964 ), 965 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 966 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 967 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 968 } 969 970 SET_PARSERS = { 971 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 972 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 973 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 974 "TRANSACTION": lambda self: self._parse_set_transaction(), 975 } 976 977 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 978 979 TYPE_LITERAL_PARSERS = { 980 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 981 } 982 983 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 984 985 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 986 987 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 988 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 989 "ISOLATION": ( 990 ("LEVEL", "REPEATABLE", "READ"), 991 ("LEVEL", "READ", "COMMITTED"), 992 ("LEVEL", "READ", "UNCOMITTED"), 993 ("LEVEL", "SERIALIZABLE"), 994 ), 995 "READ": ("WRITE", "ONLY"), 996 } 997 998 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 999 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1000 ) 1001 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1002 1003 CREATE_SEQUENCE: OPTIONS_TYPE = { 1004 "SCALE": ("EXTEND", "NOEXTEND"), 1005 "SHARD": ("EXTEND", "NOEXTEND"), 1006 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1007 **dict.fromkeys( 1008 ( 1009 "SESSION", 1010 "GLOBAL", 1011 "KEEP", 1012 "NOKEEP", 1013 "ORDER", 1014 "NOORDER", 1015 "NOCACHE", 1016 "CYCLE", 1017 "NOCYCLE", 1018 "NOMINVALUE", 1019 "NOMAXVALUE", 1020 "NOSCALE", 1021 "NOSHARD", 1022 ), 1023 tuple(), 1024 ), 1025 } 1026 1027 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1028 1029 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1030 1031 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1032 1033 CLONE_KEYWORDS = {"CLONE", "COPY"} 1034 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1035 1036 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1037 1038 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1039 1040 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1041 1042 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1043 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1044 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1045 1046 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1047 1048 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1049 1050 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1051 1052 DISTINCT_TOKENS = {TokenType.DISTINCT} 1053 1054 NULL_TOKENS = {TokenType.NULL} 1055 1056 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1057 1058 STRICT_CAST = True 1059 1060 PREFIXED_PIVOT_COLUMNS = False 1061 IDENTIFY_PIVOT_STRINGS = False 1062 1063 LOG_DEFAULTS_TO_LN = False 1064 1065 # Whether ADD is present for each column added by ALTER TABLE 1066 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1067 1068 # Whether the table sample clause expects CSV syntax 1069 TABLESAMPLE_CSV = False 1070 1071 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1072 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1073 1074 # Whether the TRIM function expects the characters to trim as its first argument 1075 TRIM_PATTERN_FIRST = False 1076 1077 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1078 STRING_ALIASES = False 1079 1080 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1081 MODIFIERS_ATTACHED_TO_UNION = True 1082 UNION_MODIFIERS = {"order", "limit", "offset"} 1083 1084 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1085 NO_PAREN_IF_COMMANDS = True 1086 1087 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1088 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1089 1090 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1091 # If this is True and '(' is not found, the keyword will be treated as an identifier 1092 VALUES_FOLLOWED_BY_PAREN = True 1093 1094 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1095 SUPPORTS_IMPLICIT_UNNEST = False 1096 1097 __slots__ = ( 1098 "error_level", 1099 "error_message_context", 1100 "max_errors", 1101 "dialect", 1102 "sql", 1103 "errors", 1104 "_tokens", 1105 "_index", 1106 "_curr", 1107 "_next", 1108 "_prev", 1109 "_prev_comments", 1110 ) 1111 1112 # Autofilled 1113 SHOW_TRIE: t.Dict = {} 1114 SET_TRIE: t.Dict = {} 1115 1116 def __init__( 1117 self, 1118 error_level: t.Optional[ErrorLevel] = None, 1119 error_message_context: int = 100, 1120 max_errors: int = 3, 1121 dialect: DialectType = None, 1122 ): 1123 from sqlglot.dialects import Dialect 1124 1125 self.error_level = error_level or ErrorLevel.IMMEDIATE 1126 self.error_message_context = error_message_context 1127 self.max_errors = max_errors 1128 self.dialect = Dialect.get_or_raise(dialect) 1129 self.reset() 1130 1131 def reset(self): 1132 self.sql = "" 1133 self.errors = [] 1134 self._tokens = [] 1135 self._index = 0 1136 self._curr = None 1137 self._next = None 1138 self._prev = None 1139 self._prev_comments = None 1140 1141 def parse( 1142 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1143 ) -> t.List[t.Optional[exp.Expression]]: 1144 """ 1145 Parses a list of tokens and returns a list of syntax trees, one tree 1146 per parsed SQL statement. 1147 1148 Args: 1149 raw_tokens: The list of tokens. 1150 sql: The original SQL string, used to produce helpful debug messages. 1151 1152 Returns: 1153 The list of the produced syntax trees. 1154 """ 1155 return self._parse( 1156 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1157 ) 1158 1159 def parse_into( 1160 self, 1161 expression_types: exp.IntoType, 1162 raw_tokens: t.List[Token], 1163 sql: t.Optional[str] = None, 1164 ) -> t.List[t.Optional[exp.Expression]]: 1165 """ 1166 Parses a list of tokens into a given Expression type. If a collection of Expression 1167 types is given instead, this method will try to parse the token list into each one 1168 of them, stopping at the first for which the parsing succeeds. 1169 1170 Args: 1171 expression_types: The expression type(s) to try and parse the token list into. 1172 raw_tokens: The list of tokens. 1173 sql: The original SQL string, used to produce helpful debug messages. 1174 1175 Returns: 1176 The target Expression. 1177 """ 1178 errors = [] 1179 for expression_type in ensure_list(expression_types): 1180 parser = self.EXPRESSION_PARSERS.get(expression_type) 1181 if not parser: 1182 raise TypeError(f"No parser registered for {expression_type}") 1183 1184 try: 1185 return self._parse(parser, raw_tokens, sql) 1186 except ParseError as e: 1187 e.errors[0]["into_expression"] = expression_type 1188 errors.append(e) 1189 1190 raise ParseError( 1191 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1192 errors=merge_errors(errors), 1193 ) from errors[-1] 1194 1195 def _parse( 1196 self, 1197 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1198 raw_tokens: t.List[Token], 1199 sql: t.Optional[str] = None, 1200 ) -> t.List[t.Optional[exp.Expression]]: 1201 self.reset() 1202 self.sql = sql or "" 1203 1204 total = len(raw_tokens) 1205 chunks: t.List[t.List[Token]] = [[]] 1206 1207 for i, token in enumerate(raw_tokens): 1208 if token.token_type == TokenType.SEMICOLON: 1209 if i < total - 1: 1210 chunks.append([]) 1211 else: 1212 chunks[-1].append(token) 1213 1214 expressions = [] 1215 1216 for tokens in chunks: 1217 self._index = -1 1218 self._tokens = tokens 1219 self._advance() 1220 1221 expressions.append(parse_method(self)) 1222 1223 if self._index < len(self._tokens): 1224 self.raise_error("Invalid expression / Unexpected token") 1225 1226 self.check_errors() 1227 1228 return expressions 1229 1230 def check_errors(self) -> None: 1231 """Logs or raises any found errors, depending on the chosen error level setting.""" 1232 if self.error_level == ErrorLevel.WARN: 1233 for error in self.errors: 1234 logger.error(str(error)) 1235 elif self.error_level == ErrorLevel.RAISE and self.errors: 1236 raise ParseError( 1237 concat_messages(self.errors, self.max_errors), 1238 errors=merge_errors(self.errors), 1239 ) 1240 1241 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1242 """ 1243 Appends an error in the list of recorded errors or raises it, depending on the chosen 1244 error level setting. 1245 """ 1246 token = token or self._curr or self._prev or Token.string("") 1247 start = token.start 1248 end = token.end + 1 1249 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1250 highlight = self.sql[start:end] 1251 end_context = self.sql[end : end + self.error_message_context] 1252 1253 error = ParseError.new( 1254 f"{message}. Line {token.line}, Col: {token.col}.\n" 1255 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1256 description=message, 1257 line=token.line, 1258 col=token.col, 1259 start_context=start_context, 1260 highlight=highlight, 1261 end_context=end_context, 1262 ) 1263 1264 if self.error_level == ErrorLevel.IMMEDIATE: 1265 raise error 1266 1267 self.errors.append(error) 1268 1269 def expression( 1270 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1271 ) -> E: 1272 """ 1273 Creates a new, validated Expression. 1274 1275 Args: 1276 exp_class: The expression class to instantiate. 1277 comments: An optional list of comments to attach to the expression. 1278 kwargs: The arguments to set for the expression along with their respective values. 1279 1280 Returns: 1281 The target expression. 1282 """ 1283 instance = exp_class(**kwargs) 1284 instance.add_comments(comments) if comments else self._add_comments(instance) 1285 return self.validate_expression(instance) 1286 1287 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1288 if expression and self._prev_comments: 1289 expression.add_comments(self._prev_comments) 1290 self._prev_comments = None 1291 1292 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1293 """ 1294 Validates an Expression, making sure that all its mandatory arguments are set. 1295 1296 Args: 1297 expression: The expression to validate. 1298 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1299 1300 Returns: 1301 The validated expression. 1302 """ 1303 if self.error_level != ErrorLevel.IGNORE: 1304 for error_message in expression.error_messages(args): 1305 self.raise_error(error_message) 1306 1307 return expression 1308 1309 def _find_sql(self, start: Token, end: Token) -> str: 1310 return self.sql[start.start : end.end + 1] 1311 1312 def _is_connected(self) -> bool: 1313 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1314 1315 def _advance(self, times: int = 1) -> None: 1316 self._index += times 1317 self._curr = seq_get(self._tokens, self._index) 1318 self._next = seq_get(self._tokens, self._index + 1) 1319 1320 if self._index > 0: 1321 self._prev = self._tokens[self._index - 1] 1322 self._prev_comments = self._prev.comments 1323 else: 1324 self._prev = None 1325 self._prev_comments = None 1326 1327 def _retreat(self, index: int) -> None: 1328 if index != self._index: 1329 self._advance(index - self._index) 1330 1331 def _warn_unsupported(self) -> None: 1332 if len(self._tokens) <= 1: 1333 return 1334 1335 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1336 # interested in emitting a warning for the one being currently processed. 1337 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1338 1339 logger.warning( 1340 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1341 ) 1342 1343 def _parse_command(self) -> exp.Command: 1344 self._warn_unsupported() 1345 return self.expression( 1346 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1347 ) 1348 1349 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1350 start = self._prev 1351 exists = self._parse_exists() if allow_exists else None 1352 1353 self._match(TokenType.ON) 1354 1355 kind = self._match_set(self.CREATABLES) and self._prev 1356 if not kind: 1357 return self._parse_as_command(start) 1358 1359 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1360 this = self._parse_user_defined_function(kind=kind.token_type) 1361 elif kind.token_type == TokenType.TABLE: 1362 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1363 elif kind.token_type == TokenType.COLUMN: 1364 this = self._parse_column() 1365 else: 1366 this = self._parse_id_var() 1367 1368 self._match(TokenType.IS) 1369 1370 return self.expression( 1371 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1372 ) 1373 1374 def _parse_to_table( 1375 self, 1376 ) -> exp.ToTableProperty: 1377 table = self._parse_table_parts(schema=True) 1378 return self.expression(exp.ToTableProperty, this=table) 1379 1380 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1381 def _parse_ttl(self) -> exp.Expression: 1382 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1383 this = self._parse_bitwise() 1384 1385 if self._match_text_seq("DELETE"): 1386 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1387 if self._match_text_seq("RECOMPRESS"): 1388 return self.expression( 1389 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1390 ) 1391 if self._match_text_seq("TO", "DISK"): 1392 return self.expression( 1393 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1394 ) 1395 if self._match_text_seq("TO", "VOLUME"): 1396 return self.expression( 1397 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1398 ) 1399 1400 return this 1401 1402 expressions = self._parse_csv(_parse_ttl_action) 1403 where = self._parse_where() 1404 group = self._parse_group() 1405 1406 aggregates = None 1407 if group and self._match(TokenType.SET): 1408 aggregates = self._parse_csv(self._parse_set_item) 1409 1410 return self.expression( 1411 exp.MergeTreeTTL, 1412 expressions=expressions, 1413 where=where, 1414 group=group, 1415 aggregates=aggregates, 1416 ) 1417 1418 def _parse_statement(self) -> t.Optional[exp.Expression]: 1419 if self._curr is None: 1420 return None 1421 1422 if self._match_set(self.STATEMENT_PARSERS): 1423 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1424 1425 if self._match_set(Tokenizer.COMMANDS): 1426 return self._parse_command() 1427 1428 expression = self._parse_expression() 1429 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1430 return self._parse_query_modifiers(expression) 1431 1432 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1433 start = self._prev 1434 temporary = self._match(TokenType.TEMPORARY) 1435 materialized = self._match_text_seq("MATERIALIZED") 1436 1437 kind = self._match_set(self.CREATABLES) and self._prev.text 1438 if not kind: 1439 return self._parse_as_command(start) 1440 1441 if_exists = exists or self._parse_exists() 1442 table = self._parse_table_parts( 1443 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1444 ) 1445 1446 if self._match(TokenType.L_PAREN, advance=False): 1447 expressions = self._parse_wrapped_csv(self._parse_types) 1448 else: 1449 expressions = None 1450 1451 return self.expression( 1452 exp.Drop, 1453 comments=start.comments, 1454 exists=if_exists, 1455 this=table, 1456 expressions=expressions, 1457 kind=kind, 1458 temporary=temporary, 1459 materialized=materialized, 1460 cascade=self._match_text_seq("CASCADE"), 1461 constraints=self._match_text_seq("CONSTRAINTS"), 1462 purge=self._match_text_seq("PURGE"), 1463 ) 1464 1465 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1466 return ( 1467 self._match_text_seq("IF") 1468 and (not not_ or self._match(TokenType.NOT)) 1469 and self._match(TokenType.EXISTS) 1470 ) 1471 1472 def _parse_create(self) -> exp.Create | exp.Command: 1473 # Note: this can't be None because we've matched a statement parser 1474 start = self._prev 1475 comments = self._prev_comments 1476 1477 replace = ( 1478 start.token_type == TokenType.REPLACE 1479 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1480 or self._match_pair(TokenType.OR, TokenType.ALTER) 1481 ) 1482 1483 unique = self._match(TokenType.UNIQUE) 1484 1485 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1486 self._advance() 1487 1488 properties = None 1489 create_token = self._match_set(self.CREATABLES) and self._prev 1490 1491 if not create_token: 1492 # exp.Properties.Location.POST_CREATE 1493 properties = self._parse_properties() 1494 create_token = self._match_set(self.CREATABLES) and self._prev 1495 1496 if not properties or not create_token: 1497 return self._parse_as_command(start) 1498 1499 exists = self._parse_exists(not_=True) 1500 this = None 1501 expression: t.Optional[exp.Expression] = None 1502 indexes = None 1503 no_schema_binding = None 1504 begin = None 1505 end = None 1506 clone = None 1507 1508 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1509 nonlocal properties 1510 if properties and temp_props: 1511 properties.expressions.extend(temp_props.expressions) 1512 elif temp_props: 1513 properties = temp_props 1514 1515 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1516 this = self._parse_user_defined_function(kind=create_token.token_type) 1517 1518 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1519 extend_props(self._parse_properties()) 1520 1521 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1522 1523 if not expression: 1524 if self._match(TokenType.COMMAND): 1525 expression = self._parse_as_command(self._prev) 1526 else: 1527 begin = self._match(TokenType.BEGIN) 1528 return_ = self._match_text_seq("RETURN") 1529 1530 if self._match(TokenType.STRING, advance=False): 1531 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1532 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1533 expression = self._parse_string() 1534 extend_props(self._parse_properties()) 1535 else: 1536 expression = self._parse_statement() 1537 1538 end = self._match_text_seq("END") 1539 1540 if return_: 1541 expression = self.expression(exp.Return, this=expression) 1542 elif create_token.token_type == TokenType.INDEX: 1543 this = self._parse_index(index=self._parse_id_var()) 1544 elif create_token.token_type in self.DB_CREATABLES: 1545 table_parts = self._parse_table_parts( 1546 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1547 ) 1548 1549 # exp.Properties.Location.POST_NAME 1550 self._match(TokenType.COMMA) 1551 extend_props(self._parse_properties(before=True)) 1552 1553 this = self._parse_schema(this=table_parts) 1554 1555 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1556 extend_props(self._parse_properties()) 1557 1558 self._match(TokenType.ALIAS) 1559 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1560 # exp.Properties.Location.POST_ALIAS 1561 extend_props(self._parse_properties()) 1562 1563 if create_token.token_type == TokenType.SEQUENCE: 1564 expression = self._parse_types() 1565 extend_props(self._parse_properties()) 1566 else: 1567 expression = self._parse_ddl_select() 1568 1569 if create_token.token_type == TokenType.TABLE: 1570 # exp.Properties.Location.POST_EXPRESSION 1571 extend_props(self._parse_properties()) 1572 1573 indexes = [] 1574 while True: 1575 index = self._parse_index() 1576 1577 # exp.Properties.Location.POST_INDEX 1578 extend_props(self._parse_properties()) 1579 1580 if not index: 1581 break 1582 else: 1583 self._match(TokenType.COMMA) 1584 indexes.append(index) 1585 elif create_token.token_type == TokenType.VIEW: 1586 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1587 no_schema_binding = True 1588 1589 shallow = self._match_text_seq("SHALLOW") 1590 1591 if self._match_texts(self.CLONE_KEYWORDS): 1592 copy = self._prev.text.lower() == "copy" 1593 clone = self.expression( 1594 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1595 ) 1596 1597 if self._curr: 1598 return self._parse_as_command(start) 1599 1600 return self.expression( 1601 exp.Create, 1602 comments=comments, 1603 this=this, 1604 kind=create_token.text.upper(), 1605 replace=replace, 1606 unique=unique, 1607 expression=expression, 1608 exists=exists, 1609 properties=properties, 1610 indexes=indexes, 1611 no_schema_binding=no_schema_binding, 1612 begin=begin, 1613 end=end, 1614 clone=clone, 1615 ) 1616 1617 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1618 seq = exp.SequenceProperties() 1619 1620 options = [] 1621 index = self._index 1622 1623 while self._curr: 1624 if self._match_text_seq("INCREMENT"): 1625 self._match_text_seq("BY") 1626 self._match_text_seq("=") 1627 seq.set("increment", self._parse_term()) 1628 elif self._match_text_seq("MINVALUE"): 1629 seq.set("minvalue", self._parse_term()) 1630 elif self._match_text_seq("MAXVALUE"): 1631 seq.set("maxvalue", self._parse_term()) 1632 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1633 self._match_text_seq("=") 1634 seq.set("start", self._parse_term()) 1635 elif self._match_text_seq("CACHE"): 1636 # T-SQL allows empty CACHE which is initialized dynamically 1637 seq.set("cache", self._parse_number() or True) 1638 elif self._match_text_seq("OWNED", "BY"): 1639 # "OWNED BY NONE" is the default 1640 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1641 else: 1642 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1643 if opt: 1644 options.append(opt) 1645 else: 1646 break 1647 1648 seq.set("options", options if options else None) 1649 return None if self._index == index else seq 1650 1651 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1652 # only used for teradata currently 1653 self._match(TokenType.COMMA) 1654 1655 kwargs = { 1656 "no": self._match_text_seq("NO"), 1657 "dual": self._match_text_seq("DUAL"), 1658 "before": self._match_text_seq("BEFORE"), 1659 "default": self._match_text_seq("DEFAULT"), 1660 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1661 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1662 "after": self._match_text_seq("AFTER"), 1663 "minimum": self._match_texts(("MIN", "MINIMUM")), 1664 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1665 } 1666 1667 if self._match_texts(self.PROPERTY_PARSERS): 1668 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1669 try: 1670 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1671 except TypeError: 1672 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1673 1674 return None 1675 1676 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1677 return self._parse_wrapped_csv(self._parse_property) 1678 1679 def _parse_property(self) -> t.Optional[exp.Expression]: 1680 if self._match_texts(self.PROPERTY_PARSERS): 1681 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1682 1683 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1684 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1685 1686 if self._match_text_seq("COMPOUND", "SORTKEY"): 1687 return self._parse_sortkey(compound=True) 1688 1689 if self._match_text_seq("SQL", "SECURITY"): 1690 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1691 1692 index = self._index 1693 key = self._parse_column() 1694 1695 if not self._match(TokenType.EQ): 1696 self._retreat(index) 1697 return self._parse_sequence_properties() 1698 1699 return self.expression( 1700 exp.Property, 1701 this=key.to_dot() if isinstance(key, exp.Column) else key, 1702 value=self._parse_column() or self._parse_var(any_token=True), 1703 ) 1704 1705 def _parse_stored(self) -> exp.FileFormatProperty: 1706 self._match(TokenType.ALIAS) 1707 1708 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1709 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1710 1711 return self.expression( 1712 exp.FileFormatProperty, 1713 this=( 1714 self.expression( 1715 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1716 ) 1717 if input_format or output_format 1718 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1719 ), 1720 ) 1721 1722 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1723 self._match(TokenType.EQ) 1724 self._match(TokenType.ALIAS) 1725 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1726 1727 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1728 properties = [] 1729 while True: 1730 if before: 1731 prop = self._parse_property_before() 1732 else: 1733 prop = self._parse_property() 1734 if not prop: 1735 break 1736 for p in ensure_list(prop): 1737 properties.append(p) 1738 1739 if properties: 1740 return self.expression(exp.Properties, expressions=properties) 1741 1742 return None 1743 1744 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1745 return self.expression( 1746 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1747 ) 1748 1749 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1750 if self._index >= 2: 1751 pre_volatile_token = self._tokens[self._index - 2] 1752 else: 1753 pre_volatile_token = None 1754 1755 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1756 return exp.VolatileProperty() 1757 1758 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1759 1760 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1761 self._match_pair(TokenType.EQ, TokenType.ON) 1762 1763 prop = self.expression(exp.WithSystemVersioningProperty) 1764 if self._match(TokenType.L_PAREN): 1765 self._match_text_seq("HISTORY_TABLE", "=") 1766 prop.set("this", self._parse_table_parts()) 1767 1768 if self._match(TokenType.COMMA): 1769 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1770 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1771 1772 self._match_r_paren() 1773 1774 return prop 1775 1776 def _parse_with_property( 1777 self, 1778 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1779 if self._match(TokenType.L_PAREN, advance=False): 1780 return self._parse_wrapped_properties() 1781 1782 if self._match_text_seq("JOURNAL"): 1783 return self._parse_withjournaltable() 1784 1785 if self._match_text_seq("DATA"): 1786 return self._parse_withdata(no=False) 1787 elif self._match_text_seq("NO", "DATA"): 1788 return self._parse_withdata(no=True) 1789 1790 if not self._next: 1791 return None 1792 1793 return self._parse_withisolatedloading() 1794 1795 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1796 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1797 self._match(TokenType.EQ) 1798 1799 user = self._parse_id_var() 1800 self._match(TokenType.PARAMETER) 1801 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1802 1803 if not user or not host: 1804 return None 1805 1806 return exp.DefinerProperty(this=f"{user}@{host}") 1807 1808 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1809 self._match(TokenType.TABLE) 1810 self._match(TokenType.EQ) 1811 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1812 1813 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1814 return self.expression(exp.LogProperty, no=no) 1815 1816 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1817 return self.expression(exp.JournalProperty, **kwargs) 1818 1819 def _parse_checksum(self) -> exp.ChecksumProperty: 1820 self._match(TokenType.EQ) 1821 1822 on = None 1823 if self._match(TokenType.ON): 1824 on = True 1825 elif self._match_text_seq("OFF"): 1826 on = False 1827 1828 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1829 1830 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1831 return self.expression( 1832 exp.Cluster, 1833 expressions=( 1834 self._parse_wrapped_csv(self._parse_ordered) 1835 if wrapped 1836 else self._parse_csv(self._parse_ordered) 1837 ), 1838 ) 1839 1840 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1841 self._match_text_seq("BY") 1842 1843 self._match_l_paren() 1844 expressions = self._parse_csv(self._parse_column) 1845 self._match_r_paren() 1846 1847 if self._match_text_seq("SORTED", "BY"): 1848 self._match_l_paren() 1849 sorted_by = self._parse_csv(self._parse_ordered) 1850 self._match_r_paren() 1851 else: 1852 sorted_by = None 1853 1854 self._match(TokenType.INTO) 1855 buckets = self._parse_number() 1856 self._match_text_seq("BUCKETS") 1857 1858 return self.expression( 1859 exp.ClusteredByProperty, 1860 expressions=expressions, 1861 sorted_by=sorted_by, 1862 buckets=buckets, 1863 ) 1864 1865 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1866 if not self._match_text_seq("GRANTS"): 1867 self._retreat(self._index - 1) 1868 return None 1869 1870 return self.expression(exp.CopyGrantsProperty) 1871 1872 def _parse_freespace(self) -> exp.FreespaceProperty: 1873 self._match(TokenType.EQ) 1874 return self.expression( 1875 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1876 ) 1877 1878 def _parse_mergeblockratio( 1879 self, no: bool = False, default: bool = False 1880 ) -> exp.MergeBlockRatioProperty: 1881 if self._match(TokenType.EQ): 1882 return self.expression( 1883 exp.MergeBlockRatioProperty, 1884 this=self._parse_number(), 1885 percent=self._match(TokenType.PERCENT), 1886 ) 1887 1888 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1889 1890 def _parse_datablocksize( 1891 self, 1892 default: t.Optional[bool] = None, 1893 minimum: t.Optional[bool] = None, 1894 maximum: t.Optional[bool] = None, 1895 ) -> exp.DataBlocksizeProperty: 1896 self._match(TokenType.EQ) 1897 size = self._parse_number() 1898 1899 units = None 1900 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1901 units = self._prev.text 1902 1903 return self.expression( 1904 exp.DataBlocksizeProperty, 1905 size=size, 1906 units=units, 1907 default=default, 1908 minimum=minimum, 1909 maximum=maximum, 1910 ) 1911 1912 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1913 self._match(TokenType.EQ) 1914 always = self._match_text_seq("ALWAYS") 1915 manual = self._match_text_seq("MANUAL") 1916 never = self._match_text_seq("NEVER") 1917 default = self._match_text_seq("DEFAULT") 1918 1919 autotemp = None 1920 if self._match_text_seq("AUTOTEMP"): 1921 autotemp = self._parse_schema() 1922 1923 return self.expression( 1924 exp.BlockCompressionProperty, 1925 always=always, 1926 manual=manual, 1927 never=never, 1928 default=default, 1929 autotemp=autotemp, 1930 ) 1931 1932 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1933 no = self._match_text_seq("NO") 1934 concurrent = self._match_text_seq("CONCURRENT") 1935 self._match_text_seq("ISOLATED", "LOADING") 1936 for_all = self._match_text_seq("FOR", "ALL") 1937 for_insert = self._match_text_seq("FOR", "INSERT") 1938 for_none = self._match_text_seq("FOR", "NONE") 1939 return self.expression( 1940 exp.IsolatedLoadingProperty, 1941 no=no, 1942 concurrent=concurrent, 1943 for_all=for_all, 1944 for_insert=for_insert, 1945 for_none=for_none, 1946 ) 1947 1948 def _parse_locking(self) -> exp.LockingProperty: 1949 if self._match(TokenType.TABLE): 1950 kind = "TABLE" 1951 elif self._match(TokenType.VIEW): 1952 kind = "VIEW" 1953 elif self._match(TokenType.ROW): 1954 kind = "ROW" 1955 elif self._match_text_seq("DATABASE"): 1956 kind = "DATABASE" 1957 else: 1958 kind = None 1959 1960 if kind in ("DATABASE", "TABLE", "VIEW"): 1961 this = self._parse_table_parts() 1962 else: 1963 this = None 1964 1965 if self._match(TokenType.FOR): 1966 for_or_in = "FOR" 1967 elif self._match(TokenType.IN): 1968 for_or_in = "IN" 1969 else: 1970 for_or_in = None 1971 1972 if self._match_text_seq("ACCESS"): 1973 lock_type = "ACCESS" 1974 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1975 lock_type = "EXCLUSIVE" 1976 elif self._match_text_seq("SHARE"): 1977 lock_type = "SHARE" 1978 elif self._match_text_seq("READ"): 1979 lock_type = "READ" 1980 elif self._match_text_seq("WRITE"): 1981 lock_type = "WRITE" 1982 elif self._match_text_seq("CHECKSUM"): 1983 lock_type = "CHECKSUM" 1984 else: 1985 lock_type = None 1986 1987 override = self._match_text_seq("OVERRIDE") 1988 1989 return self.expression( 1990 exp.LockingProperty, 1991 this=this, 1992 kind=kind, 1993 for_or_in=for_or_in, 1994 lock_type=lock_type, 1995 override=override, 1996 ) 1997 1998 def _parse_partition_by(self) -> t.List[exp.Expression]: 1999 if self._match(TokenType.PARTITION_BY): 2000 return self._parse_csv(self._parse_conjunction) 2001 return [] 2002 2003 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2004 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2005 if self._match_text_seq("MINVALUE"): 2006 return exp.var("MINVALUE") 2007 if self._match_text_seq("MAXVALUE"): 2008 return exp.var("MAXVALUE") 2009 return self._parse_bitwise() 2010 2011 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2012 expression = None 2013 from_expressions = None 2014 to_expressions = None 2015 2016 if self._match(TokenType.IN): 2017 this = self._parse_wrapped_csv(self._parse_bitwise) 2018 elif self._match(TokenType.FROM): 2019 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2020 self._match_text_seq("TO") 2021 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2022 elif self._match_text_seq("WITH", "(", "MODULUS"): 2023 this = self._parse_number() 2024 self._match_text_seq(",", "REMAINDER") 2025 expression = self._parse_number() 2026 self._match_r_paren() 2027 else: 2028 self.raise_error("Failed to parse partition bound spec.") 2029 2030 return self.expression( 2031 exp.PartitionBoundSpec, 2032 this=this, 2033 expression=expression, 2034 from_expressions=from_expressions, 2035 to_expressions=to_expressions, 2036 ) 2037 2038 # https://www.postgresql.org/docs/current/sql-createtable.html 2039 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2040 if not self._match_text_seq("OF"): 2041 self._retreat(self._index - 1) 2042 return None 2043 2044 this = self._parse_table(schema=True) 2045 2046 if self._match(TokenType.DEFAULT): 2047 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2048 elif self._match_text_seq("FOR", "VALUES"): 2049 expression = self._parse_partition_bound_spec() 2050 else: 2051 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2052 2053 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2054 2055 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2056 self._match(TokenType.EQ) 2057 return self.expression( 2058 exp.PartitionedByProperty, 2059 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2060 ) 2061 2062 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2063 if self._match_text_seq("AND", "STATISTICS"): 2064 statistics = True 2065 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2066 statistics = False 2067 else: 2068 statistics = None 2069 2070 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2071 2072 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2073 if self._match_text_seq("SQL"): 2074 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2075 return None 2076 2077 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2078 if self._match_text_seq("SQL", "DATA"): 2079 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2080 return None 2081 2082 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2083 if self._match_text_seq("PRIMARY", "INDEX"): 2084 return exp.NoPrimaryIndexProperty() 2085 if self._match_text_seq("SQL"): 2086 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2087 return None 2088 2089 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2090 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2091 return exp.OnCommitProperty() 2092 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2093 return exp.OnCommitProperty(delete=True) 2094 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2095 2096 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2097 if self._match_text_seq("SQL", "DATA"): 2098 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2099 return None 2100 2101 def _parse_distkey(self) -> exp.DistKeyProperty: 2102 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2103 2104 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2105 table = self._parse_table(schema=True) 2106 2107 options = [] 2108 while self._match_texts(("INCLUDING", "EXCLUDING")): 2109 this = self._prev.text.upper() 2110 2111 id_var = self._parse_id_var() 2112 if not id_var: 2113 return None 2114 2115 options.append( 2116 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2117 ) 2118 2119 return self.expression(exp.LikeProperty, this=table, expressions=options) 2120 2121 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2122 return self.expression( 2123 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2124 ) 2125 2126 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2127 self._match(TokenType.EQ) 2128 return self.expression( 2129 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2130 ) 2131 2132 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2133 self._match_text_seq("WITH", "CONNECTION") 2134 return self.expression( 2135 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2136 ) 2137 2138 def _parse_returns(self) -> exp.ReturnsProperty: 2139 value: t.Optional[exp.Expression] 2140 is_table = self._match(TokenType.TABLE) 2141 2142 if is_table: 2143 if self._match(TokenType.LT): 2144 value = self.expression( 2145 exp.Schema, 2146 this="TABLE", 2147 expressions=self._parse_csv(self._parse_struct_types), 2148 ) 2149 if not self._match(TokenType.GT): 2150 self.raise_error("Expecting >") 2151 else: 2152 value = self._parse_schema(exp.var("TABLE")) 2153 else: 2154 value = self._parse_types() 2155 2156 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2157 2158 def _parse_describe(self) -> exp.Describe: 2159 kind = self._match_set(self.CREATABLES) and self._prev.text 2160 extended = self._match_text_seq("EXTENDED") 2161 this = self._parse_table(schema=True) 2162 properties = self._parse_properties() 2163 expressions = properties.expressions if properties else None 2164 return self.expression( 2165 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2166 ) 2167 2168 def _parse_insert(self) -> exp.Insert: 2169 comments = ensure_list(self._prev_comments) 2170 hint = self._parse_hint() 2171 overwrite = self._match(TokenType.OVERWRITE) 2172 ignore = self._match(TokenType.IGNORE) 2173 local = self._match_text_seq("LOCAL") 2174 alternative = None 2175 is_function = None 2176 2177 if self._match_text_seq("DIRECTORY"): 2178 this: t.Optional[exp.Expression] = self.expression( 2179 exp.Directory, 2180 this=self._parse_var_or_string(), 2181 local=local, 2182 row_format=self._parse_row_format(match_row=True), 2183 ) 2184 else: 2185 if self._match(TokenType.OR): 2186 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2187 2188 self._match(TokenType.INTO) 2189 comments += ensure_list(self._prev_comments) 2190 self._match(TokenType.TABLE) 2191 is_function = self._match(TokenType.FUNCTION) 2192 2193 this = self._parse_table(schema=True) if not is_function else self._parse_function() 2194 2195 returning = self._parse_returning() 2196 2197 return self.expression( 2198 exp.Insert, 2199 comments=comments, 2200 hint=hint, 2201 is_function=is_function, 2202 this=this, 2203 by_name=self._match_text_seq("BY", "NAME"), 2204 exists=self._parse_exists(), 2205 partition=self._parse_partition(), 2206 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2207 and self._parse_conjunction(), 2208 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2209 conflict=self._parse_on_conflict(), 2210 returning=returning or self._parse_returning(), 2211 overwrite=overwrite, 2212 alternative=alternative, 2213 ignore=ignore, 2214 ) 2215 2216 def _parse_kill(self) -> exp.Kill: 2217 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2218 2219 return self.expression( 2220 exp.Kill, 2221 this=self._parse_primary(), 2222 kind=kind, 2223 ) 2224 2225 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2226 conflict = self._match_text_seq("ON", "CONFLICT") 2227 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2228 2229 if not conflict and not duplicate: 2230 return None 2231 2232 conflict_keys = None 2233 constraint = None 2234 2235 if conflict: 2236 if self._match_text_seq("ON", "CONSTRAINT"): 2237 constraint = self._parse_id_var() 2238 elif self._match(TokenType.L_PAREN): 2239 conflict_keys = self._parse_csv(self._parse_id_var) 2240 self._match_r_paren() 2241 2242 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2243 if self._prev.token_type == TokenType.UPDATE: 2244 self._match(TokenType.SET) 2245 expressions = self._parse_csv(self._parse_equality) 2246 else: 2247 expressions = None 2248 2249 return self.expression( 2250 exp.OnConflict, 2251 duplicate=duplicate, 2252 expressions=expressions, 2253 action=action, 2254 conflict_keys=conflict_keys, 2255 constraint=constraint, 2256 ) 2257 2258 def _parse_returning(self) -> t.Optional[exp.Returning]: 2259 if not self._match(TokenType.RETURNING): 2260 return None 2261 return self.expression( 2262 exp.Returning, 2263 expressions=self._parse_csv(self._parse_expression), 2264 into=self._match(TokenType.INTO) and self._parse_table_part(), 2265 ) 2266 2267 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2268 if not self._match(TokenType.FORMAT): 2269 return None 2270 return self._parse_row_format() 2271 2272 def _parse_row_format( 2273 self, match_row: bool = False 2274 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2275 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2276 return None 2277 2278 if self._match_text_seq("SERDE"): 2279 this = self._parse_string() 2280 2281 serde_properties = None 2282 if self._match(TokenType.SERDE_PROPERTIES): 2283 serde_properties = self.expression( 2284 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2285 ) 2286 2287 return self.expression( 2288 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2289 ) 2290 2291 self._match_text_seq("DELIMITED") 2292 2293 kwargs = {} 2294 2295 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2296 kwargs["fields"] = self._parse_string() 2297 if self._match_text_seq("ESCAPED", "BY"): 2298 kwargs["escaped"] = self._parse_string() 2299 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2300 kwargs["collection_items"] = self._parse_string() 2301 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2302 kwargs["map_keys"] = self._parse_string() 2303 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2304 kwargs["lines"] = self._parse_string() 2305 if self._match_text_seq("NULL", "DEFINED", "AS"): 2306 kwargs["null"] = self._parse_string() 2307 2308 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2309 2310 def _parse_load(self) -> exp.LoadData | exp.Command: 2311 if self._match_text_seq("DATA"): 2312 local = self._match_text_seq("LOCAL") 2313 self._match_text_seq("INPATH") 2314 inpath = self._parse_string() 2315 overwrite = self._match(TokenType.OVERWRITE) 2316 self._match_pair(TokenType.INTO, TokenType.TABLE) 2317 2318 return self.expression( 2319 exp.LoadData, 2320 this=self._parse_table(schema=True), 2321 local=local, 2322 overwrite=overwrite, 2323 inpath=inpath, 2324 partition=self._parse_partition(), 2325 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2326 serde=self._match_text_seq("SERDE") and self._parse_string(), 2327 ) 2328 return self._parse_as_command(self._prev) 2329 2330 def _parse_delete(self) -> exp.Delete: 2331 # This handles MySQL's "Multiple-Table Syntax" 2332 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2333 tables = None 2334 comments = self._prev_comments 2335 if not self._match(TokenType.FROM, advance=False): 2336 tables = self._parse_csv(self._parse_table) or None 2337 2338 returning = self._parse_returning() 2339 2340 return self.expression( 2341 exp.Delete, 2342 comments=comments, 2343 tables=tables, 2344 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2345 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2346 where=self._parse_where(), 2347 returning=returning or self._parse_returning(), 2348 limit=self._parse_limit(), 2349 ) 2350 2351 def _parse_update(self) -> exp.Update: 2352 comments = self._prev_comments 2353 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2354 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2355 returning = self._parse_returning() 2356 return self.expression( 2357 exp.Update, 2358 comments=comments, 2359 **{ # type: ignore 2360 "this": this, 2361 "expressions": expressions, 2362 "from": self._parse_from(joins=True), 2363 "where": self._parse_where(), 2364 "returning": returning or self._parse_returning(), 2365 "order": self._parse_order(), 2366 "limit": self._parse_limit(), 2367 }, 2368 ) 2369 2370 def _parse_uncache(self) -> exp.Uncache: 2371 if not self._match(TokenType.TABLE): 2372 self.raise_error("Expecting TABLE after UNCACHE") 2373 2374 return self.expression( 2375 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2376 ) 2377 2378 def _parse_cache(self) -> exp.Cache: 2379 lazy = self._match_text_seq("LAZY") 2380 self._match(TokenType.TABLE) 2381 table = self._parse_table(schema=True) 2382 2383 options = [] 2384 if self._match_text_seq("OPTIONS"): 2385 self._match_l_paren() 2386 k = self._parse_string() 2387 self._match(TokenType.EQ) 2388 v = self._parse_string() 2389 options = [k, v] 2390 self._match_r_paren() 2391 2392 self._match(TokenType.ALIAS) 2393 return self.expression( 2394 exp.Cache, 2395 this=table, 2396 lazy=lazy, 2397 options=options, 2398 expression=self._parse_select(nested=True), 2399 ) 2400 2401 def _parse_partition(self) -> t.Optional[exp.Partition]: 2402 if not self._match(TokenType.PARTITION): 2403 return None 2404 2405 return self.expression( 2406 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2407 ) 2408 2409 def _parse_value(self) -> exp.Tuple: 2410 if self._match(TokenType.L_PAREN): 2411 expressions = self._parse_csv(self._parse_expression) 2412 self._match_r_paren() 2413 return self.expression(exp.Tuple, expressions=expressions) 2414 2415 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2416 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2417 2418 def _parse_projections(self) -> t.List[exp.Expression]: 2419 return self._parse_expressions() 2420 2421 def _parse_select( 2422 self, 2423 nested: bool = False, 2424 table: bool = False, 2425 parse_subquery_alias: bool = True, 2426 parse_set_operation: bool = True, 2427 ) -> t.Optional[exp.Expression]: 2428 cte = self._parse_with() 2429 2430 if cte: 2431 this = self._parse_statement() 2432 2433 if not this: 2434 self.raise_error("Failed to parse any statement following CTE") 2435 return cte 2436 2437 if "with" in this.arg_types: 2438 this.set("with", cte) 2439 else: 2440 self.raise_error(f"{this.key} does not support CTE") 2441 this = cte 2442 2443 return this 2444 2445 # duckdb supports leading with FROM x 2446 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2447 2448 if self._match(TokenType.SELECT): 2449 comments = self._prev_comments 2450 2451 hint = self._parse_hint() 2452 all_ = self._match(TokenType.ALL) 2453 distinct = self._match_set(self.DISTINCT_TOKENS) 2454 2455 kind = ( 2456 self._match(TokenType.ALIAS) 2457 and self._match_texts(("STRUCT", "VALUE")) 2458 and self._prev.text.upper() 2459 ) 2460 2461 if distinct: 2462 distinct = self.expression( 2463 exp.Distinct, 2464 on=self._parse_value() if self._match(TokenType.ON) else None, 2465 ) 2466 2467 if all_ and distinct: 2468 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2469 2470 limit = self._parse_limit(top=True) 2471 projections = self._parse_projections() 2472 2473 this = self.expression( 2474 exp.Select, 2475 kind=kind, 2476 hint=hint, 2477 distinct=distinct, 2478 expressions=projections, 2479 limit=limit, 2480 ) 2481 this.comments = comments 2482 2483 into = self._parse_into() 2484 if into: 2485 this.set("into", into) 2486 2487 if not from_: 2488 from_ = self._parse_from() 2489 2490 if from_: 2491 this.set("from", from_) 2492 2493 this = self._parse_query_modifiers(this) 2494 elif (table or nested) and self._match(TokenType.L_PAREN): 2495 if self._match(TokenType.PIVOT): 2496 this = self._parse_simplified_pivot() 2497 elif self._match(TokenType.FROM): 2498 this = exp.select("*").from_( 2499 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2500 ) 2501 else: 2502 this = ( 2503 self._parse_table() 2504 if table 2505 else self._parse_select(nested=True, parse_set_operation=False) 2506 ) 2507 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2508 2509 self._match_r_paren() 2510 2511 # We return early here so that the UNION isn't attached to the subquery by the 2512 # following call to _parse_set_operations, but instead becomes the parent node 2513 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2514 elif self._match(TokenType.VALUES, advance=False): 2515 this = self._parse_derived_table_values() 2516 elif from_: 2517 this = exp.select("*").from_(from_.this, copy=False) 2518 else: 2519 this = None 2520 2521 if parse_set_operation: 2522 return self._parse_set_operations(this) 2523 return this 2524 2525 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2526 if not skip_with_token and not self._match(TokenType.WITH): 2527 return None 2528 2529 comments = self._prev_comments 2530 recursive = self._match(TokenType.RECURSIVE) 2531 2532 expressions = [] 2533 while True: 2534 expressions.append(self._parse_cte()) 2535 2536 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2537 break 2538 else: 2539 self._match(TokenType.WITH) 2540 2541 return self.expression( 2542 exp.With, comments=comments, expressions=expressions, recursive=recursive 2543 ) 2544 2545 def _parse_cte(self) -> exp.CTE: 2546 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2547 if not alias or not alias.this: 2548 self.raise_error("Expected CTE to have alias") 2549 2550 self._match(TokenType.ALIAS) 2551 2552 if self._match_text_seq("NOT", "MATERIALIZED"): 2553 materialized = False 2554 elif self._match_text_seq("MATERIALIZED"): 2555 materialized = True 2556 else: 2557 materialized = None 2558 2559 return self.expression( 2560 exp.CTE, 2561 this=self._parse_wrapped(self._parse_statement), 2562 alias=alias, 2563 materialized=materialized, 2564 ) 2565 2566 def _parse_table_alias( 2567 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2568 ) -> t.Optional[exp.TableAlias]: 2569 any_token = self._match(TokenType.ALIAS) 2570 alias = ( 2571 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2572 or self._parse_string_as_identifier() 2573 ) 2574 2575 index = self._index 2576 if self._match(TokenType.L_PAREN): 2577 columns = self._parse_csv(self._parse_function_parameter) 2578 self._match_r_paren() if columns else self._retreat(index) 2579 else: 2580 columns = None 2581 2582 if not alias and not columns: 2583 return None 2584 2585 return self.expression(exp.TableAlias, this=alias, columns=columns) 2586 2587 def _parse_subquery( 2588 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2589 ) -> t.Optional[exp.Subquery]: 2590 if not this: 2591 return None 2592 2593 return self.expression( 2594 exp.Subquery, 2595 this=this, 2596 pivots=self._parse_pivots(), 2597 alias=self._parse_table_alias() if parse_alias else None, 2598 ) 2599 2600 def _implicit_unnests_to_explicit(self, this: E) -> E: 2601 from sqlglot.optimizer.normalize_identifiers import ( 2602 normalize_identifiers as _norm, 2603 ) 2604 2605 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2606 for i, join in enumerate(this.args.get("joins") or []): 2607 table = join.this 2608 normalized_table = table.copy() 2609 normalized_table.meta["maybe_column"] = True 2610 normalized_table = _norm(normalized_table, dialect=self.dialect) 2611 2612 if isinstance(table, exp.Table) and not join.args.get("on"): 2613 if normalized_table.parts[0].name in refs: 2614 table_as_column = table.to_column() 2615 unnest = exp.Unnest(expressions=[table_as_column]) 2616 2617 # Table.to_column creates a parent Alias node that we want to convert to 2618 # a TableAlias and attach to the Unnest, so it matches the parser's output 2619 if isinstance(table.args.get("alias"), exp.TableAlias): 2620 table_as_column.replace(table_as_column.this) 2621 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2622 2623 table.replace(unnest) 2624 2625 refs.add(normalized_table.alias_or_name) 2626 2627 return this 2628 2629 def _parse_query_modifiers( 2630 self, this: t.Optional[exp.Expression] 2631 ) -> t.Optional[exp.Expression]: 2632 if isinstance(this, (exp.Query, exp.Table)): 2633 for join in iter(self._parse_join, None): 2634 this.append("joins", join) 2635 for lateral in iter(self._parse_lateral, None): 2636 this.append("laterals", lateral) 2637 2638 while True: 2639 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2640 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2641 key, expression = parser(self) 2642 2643 if expression: 2644 this.set(key, expression) 2645 if key == "limit": 2646 offset = expression.args.pop("offset", None) 2647 2648 if offset: 2649 offset = exp.Offset(expression=offset) 2650 this.set("offset", offset) 2651 2652 limit_by_expressions = expression.expressions 2653 expression.set("expressions", None) 2654 offset.set("expressions", limit_by_expressions) 2655 continue 2656 break 2657 2658 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2659 this = self._implicit_unnests_to_explicit(this) 2660 2661 return this 2662 2663 def _parse_hint(self) -> t.Optional[exp.Hint]: 2664 if self._match(TokenType.HINT): 2665 hints = [] 2666 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2667 hints.extend(hint) 2668 2669 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2670 self.raise_error("Expected */ after HINT") 2671 2672 return self.expression(exp.Hint, expressions=hints) 2673 2674 return None 2675 2676 def _parse_into(self) -> t.Optional[exp.Into]: 2677 if not self._match(TokenType.INTO): 2678 return None 2679 2680 temp = self._match(TokenType.TEMPORARY) 2681 unlogged = self._match_text_seq("UNLOGGED") 2682 self._match(TokenType.TABLE) 2683 2684 return self.expression( 2685 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2686 ) 2687 2688 def _parse_from( 2689 self, joins: bool = False, skip_from_token: bool = False 2690 ) -> t.Optional[exp.From]: 2691 if not skip_from_token and not self._match(TokenType.FROM): 2692 return None 2693 2694 return self.expression( 2695 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2696 ) 2697 2698 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2699 if not self._match(TokenType.MATCH_RECOGNIZE): 2700 return None 2701 2702 self._match_l_paren() 2703 2704 partition = self._parse_partition_by() 2705 order = self._parse_order() 2706 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2707 2708 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2709 rows = exp.var("ONE ROW PER MATCH") 2710 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2711 text = "ALL ROWS PER MATCH" 2712 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2713 text += " SHOW EMPTY MATCHES" 2714 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2715 text += " OMIT EMPTY MATCHES" 2716 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2717 text += " WITH UNMATCHED ROWS" 2718 rows = exp.var(text) 2719 else: 2720 rows = None 2721 2722 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2723 text = "AFTER MATCH SKIP" 2724 if self._match_text_seq("PAST", "LAST", "ROW"): 2725 text += " PAST LAST ROW" 2726 elif self._match_text_seq("TO", "NEXT", "ROW"): 2727 text += " TO NEXT ROW" 2728 elif self._match_text_seq("TO", "FIRST"): 2729 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2730 elif self._match_text_seq("TO", "LAST"): 2731 text += f" TO LAST {self._advance_any().text}" # type: ignore 2732 after = exp.var(text) 2733 else: 2734 after = None 2735 2736 if self._match_text_seq("PATTERN"): 2737 self._match_l_paren() 2738 2739 if not self._curr: 2740 self.raise_error("Expecting )", self._curr) 2741 2742 paren = 1 2743 start = self._curr 2744 2745 while self._curr and paren > 0: 2746 if self._curr.token_type == TokenType.L_PAREN: 2747 paren += 1 2748 if self._curr.token_type == TokenType.R_PAREN: 2749 paren -= 1 2750 2751 end = self._prev 2752 self._advance() 2753 2754 if paren > 0: 2755 self.raise_error("Expecting )", self._curr) 2756 2757 pattern = exp.var(self._find_sql(start, end)) 2758 else: 2759 pattern = None 2760 2761 define = ( 2762 self._parse_csv(self._parse_name_as_expression) 2763 if self._match_text_seq("DEFINE") 2764 else None 2765 ) 2766 2767 self._match_r_paren() 2768 2769 return self.expression( 2770 exp.MatchRecognize, 2771 partition_by=partition, 2772 order=order, 2773 measures=measures, 2774 rows=rows, 2775 after=after, 2776 pattern=pattern, 2777 define=define, 2778 alias=self._parse_table_alias(), 2779 ) 2780 2781 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2782 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2783 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2784 cross_apply = False 2785 2786 if cross_apply is not None: 2787 this = self._parse_select(table=True) 2788 view = None 2789 outer = None 2790 elif self._match(TokenType.LATERAL): 2791 this = self._parse_select(table=True) 2792 view = self._match(TokenType.VIEW) 2793 outer = self._match(TokenType.OUTER) 2794 else: 2795 return None 2796 2797 if not this: 2798 this = ( 2799 self._parse_unnest() 2800 or self._parse_function() 2801 or self._parse_id_var(any_token=False) 2802 ) 2803 2804 while self._match(TokenType.DOT): 2805 this = exp.Dot( 2806 this=this, 2807 expression=self._parse_function() or self._parse_id_var(any_token=False), 2808 ) 2809 2810 if view: 2811 table = self._parse_id_var(any_token=False) 2812 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2813 table_alias: t.Optional[exp.TableAlias] = self.expression( 2814 exp.TableAlias, this=table, columns=columns 2815 ) 2816 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2817 # We move the alias from the lateral's child node to the lateral itself 2818 table_alias = this.args["alias"].pop() 2819 else: 2820 table_alias = self._parse_table_alias() 2821 2822 return self.expression( 2823 exp.Lateral, 2824 this=this, 2825 view=view, 2826 outer=outer, 2827 alias=table_alias, 2828 cross_apply=cross_apply, 2829 ) 2830 2831 def _parse_join_parts( 2832 self, 2833 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2834 return ( 2835 self._match_set(self.JOIN_METHODS) and self._prev, 2836 self._match_set(self.JOIN_SIDES) and self._prev, 2837 self._match_set(self.JOIN_KINDS) and self._prev, 2838 ) 2839 2840 def _parse_join( 2841 self, skip_join_token: bool = False, parse_bracket: bool = False 2842 ) -> t.Optional[exp.Join]: 2843 if self._match(TokenType.COMMA): 2844 return self.expression(exp.Join, this=self._parse_table()) 2845 2846 index = self._index 2847 method, side, kind = self._parse_join_parts() 2848 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2849 join = self._match(TokenType.JOIN) 2850 2851 if not skip_join_token and not join: 2852 self._retreat(index) 2853 kind = None 2854 method = None 2855 side = None 2856 2857 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2858 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2859 2860 if not skip_join_token and not join and not outer_apply and not cross_apply: 2861 return None 2862 2863 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2864 2865 if method: 2866 kwargs["method"] = method.text 2867 if side: 2868 kwargs["side"] = side.text 2869 if kind: 2870 kwargs["kind"] = kind.text 2871 if hint: 2872 kwargs["hint"] = hint 2873 2874 if self._match(TokenType.ON): 2875 kwargs["on"] = self._parse_conjunction() 2876 elif self._match(TokenType.USING): 2877 kwargs["using"] = self._parse_wrapped_id_vars() 2878 elif not (kind and kind.token_type == TokenType.CROSS): 2879 index = self._index 2880 join = self._parse_join() 2881 2882 if join and self._match(TokenType.ON): 2883 kwargs["on"] = self._parse_conjunction() 2884 elif join and self._match(TokenType.USING): 2885 kwargs["using"] = self._parse_wrapped_id_vars() 2886 else: 2887 join = None 2888 self._retreat(index) 2889 2890 kwargs["this"].set("joins", [join] if join else None) 2891 2892 comments = [c for token in (method, side, kind) if token for c in token.comments] 2893 return self.expression(exp.Join, comments=comments, **kwargs) 2894 2895 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2896 this = self._parse_conjunction() 2897 2898 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2899 return this 2900 2901 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2902 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2903 2904 return this 2905 2906 def _parse_index_params(self) -> exp.IndexParameters: 2907 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2908 2909 if self._match(TokenType.L_PAREN, advance=False): 2910 columns = self._parse_wrapped_csv(self._parse_with_operator) 2911 else: 2912 columns = None 2913 2914 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2915 partition_by = self._parse_partition_by() 2916 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 2917 tablespace = ( 2918 self._parse_var(any_token=True) 2919 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 2920 else None 2921 ) 2922 where = self._parse_where() 2923 2924 return self.expression( 2925 exp.IndexParameters, 2926 using=using, 2927 columns=columns, 2928 include=include, 2929 partition_by=partition_by, 2930 where=where, 2931 with_storage=with_storage, 2932 tablespace=tablespace, 2933 ) 2934 2935 def _parse_index( 2936 self, 2937 index: t.Optional[exp.Expression] = None, 2938 ) -> t.Optional[exp.Index]: 2939 if index: 2940 unique = None 2941 primary = None 2942 amp = None 2943 2944 self._match(TokenType.ON) 2945 self._match(TokenType.TABLE) # hive 2946 table = self._parse_table_parts(schema=True) 2947 else: 2948 unique = self._match(TokenType.UNIQUE) 2949 primary = self._match_text_seq("PRIMARY") 2950 amp = self._match_text_seq("AMP") 2951 2952 if not self._match(TokenType.INDEX): 2953 return None 2954 2955 index = self._parse_id_var() 2956 table = None 2957 2958 params = self._parse_index_params() 2959 2960 return self.expression( 2961 exp.Index, 2962 this=index, 2963 table=table, 2964 unique=unique, 2965 primary=primary, 2966 amp=amp, 2967 params=params, 2968 ) 2969 2970 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2971 hints: t.List[exp.Expression] = [] 2972 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2973 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2974 hints.append( 2975 self.expression( 2976 exp.WithTableHint, 2977 expressions=self._parse_csv( 2978 lambda: self._parse_function() or self._parse_var(any_token=True) 2979 ), 2980 ) 2981 ) 2982 self._match_r_paren() 2983 else: 2984 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2985 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2986 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2987 2988 self._match_texts(("INDEX", "KEY")) 2989 if self._match(TokenType.FOR): 2990 hint.set("target", self._advance_any() and self._prev.text.upper()) 2991 2992 hint.set("expressions", self._parse_wrapped_id_vars()) 2993 hints.append(hint) 2994 2995 return hints or None 2996 2997 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2998 return ( 2999 (not schema and self._parse_function(optional_parens=False)) 3000 or self._parse_id_var(any_token=False) 3001 or self._parse_string_as_identifier() 3002 or self._parse_placeholder() 3003 ) 3004 3005 def _parse_table_parts( 3006 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3007 ) -> exp.Table: 3008 catalog = None 3009 db = None 3010 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3011 3012 while self._match(TokenType.DOT): 3013 if catalog: 3014 # This allows nesting the table in arbitrarily many dot expressions if needed 3015 table = self.expression( 3016 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3017 ) 3018 else: 3019 catalog = db 3020 db = table 3021 # "" used for tsql FROM a..b case 3022 table = self._parse_table_part(schema=schema) or "" 3023 3024 if ( 3025 wildcard 3026 and self._is_connected() 3027 and (isinstance(table, exp.Identifier) or not table) 3028 and self._match(TokenType.STAR) 3029 ): 3030 if isinstance(table, exp.Identifier): 3031 table.args["this"] += "*" 3032 else: 3033 table = exp.Identifier(this="*") 3034 3035 if is_db_reference: 3036 catalog = db 3037 db = table 3038 table = None 3039 3040 if not table and not is_db_reference: 3041 self.raise_error(f"Expected table name but got {self._curr}") 3042 if not db and is_db_reference: 3043 self.raise_error(f"Expected database name but got {self._curr}") 3044 3045 return self.expression( 3046 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 3047 ) 3048 3049 def _parse_table( 3050 self, 3051 schema: bool = False, 3052 joins: bool = False, 3053 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3054 parse_bracket: bool = False, 3055 is_db_reference: bool = False, 3056 ) -> t.Optional[exp.Expression]: 3057 lateral = self._parse_lateral() 3058 if lateral: 3059 return lateral 3060 3061 unnest = self._parse_unnest() 3062 if unnest: 3063 return unnest 3064 3065 values = self._parse_derived_table_values() 3066 if values: 3067 return values 3068 3069 subquery = self._parse_select(table=True) 3070 if subquery: 3071 if not subquery.args.get("pivots"): 3072 subquery.set("pivots", self._parse_pivots()) 3073 return subquery 3074 3075 bracket = parse_bracket and self._parse_bracket(None) 3076 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3077 3078 only = self._match(TokenType.ONLY) 3079 3080 this = t.cast( 3081 exp.Expression, 3082 bracket 3083 or self._parse_bracket( 3084 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3085 ), 3086 ) 3087 3088 if only: 3089 this.set("only", only) 3090 3091 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3092 self._match_text_seq("*") 3093 3094 if schema: 3095 return self._parse_schema(this=this) 3096 3097 version = self._parse_version() 3098 3099 if version: 3100 this.set("version", version) 3101 3102 if self.dialect.ALIAS_POST_TABLESAMPLE: 3103 table_sample = self._parse_table_sample() 3104 3105 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3106 if alias: 3107 this.set("alias", alias) 3108 3109 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3110 return self.expression( 3111 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3112 ) 3113 3114 this.set("hints", self._parse_table_hints()) 3115 3116 if not this.args.get("pivots"): 3117 this.set("pivots", self._parse_pivots()) 3118 3119 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3120 table_sample = self._parse_table_sample() 3121 3122 if table_sample: 3123 table_sample.set("this", this) 3124 this = table_sample 3125 3126 if joins: 3127 for join in iter(self._parse_join, None): 3128 this.append("joins", join) 3129 3130 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3131 this.set("ordinality", True) 3132 this.set("alias", self._parse_table_alias()) 3133 3134 return this 3135 3136 def _parse_version(self) -> t.Optional[exp.Version]: 3137 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3138 this = "TIMESTAMP" 3139 elif self._match(TokenType.VERSION_SNAPSHOT): 3140 this = "VERSION" 3141 else: 3142 return None 3143 3144 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3145 kind = self._prev.text.upper() 3146 start = self._parse_bitwise() 3147 self._match_texts(("TO", "AND")) 3148 end = self._parse_bitwise() 3149 expression: t.Optional[exp.Expression] = self.expression( 3150 exp.Tuple, expressions=[start, end] 3151 ) 3152 elif self._match_text_seq("CONTAINED", "IN"): 3153 kind = "CONTAINED IN" 3154 expression = self.expression( 3155 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3156 ) 3157 elif self._match(TokenType.ALL): 3158 kind = "ALL" 3159 expression = None 3160 else: 3161 self._match_text_seq("AS", "OF") 3162 kind = "AS OF" 3163 expression = self._parse_type() 3164 3165 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3166 3167 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3168 if not self._match(TokenType.UNNEST): 3169 return None 3170 3171 expressions = self._parse_wrapped_csv(self._parse_equality) 3172 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3173 3174 alias = self._parse_table_alias() if with_alias else None 3175 3176 if alias: 3177 if self.dialect.UNNEST_COLUMN_ONLY: 3178 if alias.args.get("columns"): 3179 self.raise_error("Unexpected extra column alias in unnest.") 3180 3181 alias.set("columns", [alias.this]) 3182 alias.set("this", None) 3183 3184 columns = alias.args.get("columns") or [] 3185 if offset and len(expressions) < len(columns): 3186 offset = columns.pop() 3187 3188 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3189 self._match(TokenType.ALIAS) 3190 offset = self._parse_id_var( 3191 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3192 ) or exp.to_identifier("offset") 3193 3194 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3195 3196 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3197 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3198 if not is_derived and not self._match_text_seq("VALUES"): 3199 return None 3200 3201 expressions = self._parse_csv(self._parse_value) 3202 alias = self._parse_table_alias() 3203 3204 if is_derived: 3205 self._match_r_paren() 3206 3207 return self.expression( 3208 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3209 ) 3210 3211 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3212 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3213 as_modifier and self._match_text_seq("USING", "SAMPLE") 3214 ): 3215 return None 3216 3217 bucket_numerator = None 3218 bucket_denominator = None 3219 bucket_field = None 3220 percent = None 3221 size = None 3222 seed = None 3223 3224 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3225 matched_l_paren = self._match(TokenType.L_PAREN) 3226 3227 if self.TABLESAMPLE_CSV: 3228 num = None 3229 expressions = self._parse_csv(self._parse_primary) 3230 else: 3231 expressions = None 3232 num = ( 3233 self._parse_factor() 3234 if self._match(TokenType.NUMBER, advance=False) 3235 else self._parse_primary() or self._parse_placeholder() 3236 ) 3237 3238 if self._match_text_seq("BUCKET"): 3239 bucket_numerator = self._parse_number() 3240 self._match_text_seq("OUT", "OF") 3241 bucket_denominator = bucket_denominator = self._parse_number() 3242 self._match(TokenType.ON) 3243 bucket_field = self._parse_field() 3244 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3245 percent = num 3246 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3247 size = num 3248 else: 3249 percent = num 3250 3251 if matched_l_paren: 3252 self._match_r_paren() 3253 3254 if self._match(TokenType.L_PAREN): 3255 method = self._parse_var(upper=True) 3256 seed = self._match(TokenType.COMMA) and self._parse_number() 3257 self._match_r_paren() 3258 elif self._match_texts(("SEED", "REPEATABLE")): 3259 seed = self._parse_wrapped(self._parse_number) 3260 3261 return self.expression( 3262 exp.TableSample, 3263 expressions=expressions, 3264 method=method, 3265 bucket_numerator=bucket_numerator, 3266 bucket_denominator=bucket_denominator, 3267 bucket_field=bucket_field, 3268 percent=percent, 3269 size=size, 3270 seed=seed, 3271 ) 3272 3273 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3274 return list(iter(self._parse_pivot, None)) or None 3275 3276 # https://duckdb.org/docs/sql/statements/pivot 3277 def _parse_simplified_pivot(self) -> exp.Pivot: 3278 def _parse_on() -> t.Optional[exp.Expression]: 3279 this = self._parse_bitwise() 3280 return self._parse_in(this) if self._match(TokenType.IN) else this 3281 3282 this = self._parse_table() 3283 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3284 using = self._match(TokenType.USING) and self._parse_csv( 3285 lambda: self._parse_alias(self._parse_function()) 3286 ) 3287 group = self._parse_group() 3288 return self.expression( 3289 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3290 ) 3291 3292 def _parse_pivot_in(self) -> exp.In: 3293 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3294 this = self._parse_conjunction() 3295 3296 self._match(TokenType.ALIAS) 3297 alias = self._parse_field() 3298 if alias: 3299 return self.expression(exp.PivotAlias, this=this, alias=alias) 3300 3301 return this 3302 3303 value = self._parse_column() 3304 3305 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3306 self.raise_error("Expecting IN (") 3307 3308 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3309 3310 self._match_r_paren() 3311 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3312 3313 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3314 index = self._index 3315 include_nulls = None 3316 3317 if self._match(TokenType.PIVOT): 3318 unpivot = False 3319 elif self._match(TokenType.UNPIVOT): 3320 unpivot = True 3321 3322 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3323 if self._match_text_seq("INCLUDE", "NULLS"): 3324 include_nulls = True 3325 elif self._match_text_seq("EXCLUDE", "NULLS"): 3326 include_nulls = False 3327 else: 3328 return None 3329 3330 expressions = [] 3331 3332 if not self._match(TokenType.L_PAREN): 3333 self._retreat(index) 3334 return None 3335 3336 if unpivot: 3337 expressions = self._parse_csv(self._parse_column) 3338 else: 3339 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3340 3341 if not expressions: 3342 self.raise_error("Failed to parse PIVOT's aggregation list") 3343 3344 if not self._match(TokenType.FOR): 3345 self.raise_error("Expecting FOR") 3346 3347 field = self._parse_pivot_in() 3348 3349 self._match_r_paren() 3350 3351 pivot = self.expression( 3352 exp.Pivot, 3353 expressions=expressions, 3354 field=field, 3355 unpivot=unpivot, 3356 include_nulls=include_nulls, 3357 ) 3358 3359 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3360 pivot.set("alias", self._parse_table_alias()) 3361 3362 if not unpivot: 3363 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3364 3365 columns: t.List[exp.Expression] = [] 3366 for fld in pivot.args["field"].expressions: 3367 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3368 for name in names: 3369 if self.PREFIXED_PIVOT_COLUMNS: 3370 name = f"{name}_{field_name}" if name else field_name 3371 else: 3372 name = f"{field_name}_{name}" if name else field_name 3373 3374 columns.append(exp.to_identifier(name)) 3375 3376 pivot.set("columns", columns) 3377 3378 return pivot 3379 3380 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3381 return [agg.alias for agg in aggregations] 3382 3383 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3384 if not skip_where_token and not self._match(TokenType.PREWHERE): 3385 return None 3386 3387 return self.expression( 3388 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3389 ) 3390 3391 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3392 if not skip_where_token and not self._match(TokenType.WHERE): 3393 return None 3394 3395 return self.expression( 3396 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3397 ) 3398 3399 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3400 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3401 return None 3402 3403 elements = defaultdict(list) 3404 3405 if self._match(TokenType.ALL): 3406 return self.expression(exp.Group, all=True) 3407 3408 while True: 3409 expressions = self._parse_csv(self._parse_conjunction) 3410 if expressions: 3411 elements["expressions"].extend(expressions) 3412 3413 grouping_sets = self._parse_grouping_sets() 3414 if grouping_sets: 3415 elements["grouping_sets"].extend(grouping_sets) 3416 3417 rollup = None 3418 cube = None 3419 totals = None 3420 3421 index = self._index 3422 with_ = self._match(TokenType.WITH) 3423 if self._match(TokenType.ROLLUP): 3424 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3425 elements["rollup"].extend(ensure_list(rollup)) 3426 3427 if self._match(TokenType.CUBE): 3428 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3429 elements["cube"].extend(ensure_list(cube)) 3430 3431 if self._match_text_seq("TOTALS"): 3432 totals = True 3433 elements["totals"] = True # type: ignore 3434 3435 if not (grouping_sets or rollup or cube or totals): 3436 if with_: 3437 self._retreat(index) 3438 break 3439 3440 return self.expression(exp.Group, **elements) # type: ignore 3441 3442 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3443 if not self._match(TokenType.GROUPING_SETS): 3444 return None 3445 3446 return self._parse_wrapped_csv(self._parse_grouping_set) 3447 3448 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3449 if self._match(TokenType.L_PAREN): 3450 grouping_set = self._parse_csv(self._parse_column) 3451 self._match_r_paren() 3452 return self.expression(exp.Tuple, expressions=grouping_set) 3453 3454 return self._parse_column() 3455 3456 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3457 if not skip_having_token and not self._match(TokenType.HAVING): 3458 return None 3459 return self.expression(exp.Having, this=self._parse_conjunction()) 3460 3461 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3462 if not self._match(TokenType.QUALIFY): 3463 return None 3464 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3465 3466 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3467 if skip_start_token: 3468 start = None 3469 elif self._match(TokenType.START_WITH): 3470 start = self._parse_conjunction() 3471 else: 3472 return None 3473 3474 self._match(TokenType.CONNECT_BY) 3475 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3476 exp.Prior, this=self._parse_bitwise() 3477 ) 3478 connect = self._parse_conjunction() 3479 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3480 3481 if not start and self._match(TokenType.START_WITH): 3482 start = self._parse_conjunction() 3483 3484 return self.expression(exp.Connect, start=start, connect=connect) 3485 3486 def _parse_name_as_expression(self) -> exp.Alias: 3487 return self.expression( 3488 exp.Alias, 3489 alias=self._parse_id_var(any_token=True), 3490 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3491 ) 3492 3493 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3494 if self._match_text_seq("INTERPOLATE"): 3495 return self._parse_wrapped_csv(self._parse_name_as_expression) 3496 return None 3497 3498 def _parse_order( 3499 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3500 ) -> t.Optional[exp.Expression]: 3501 siblings = None 3502 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3503 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3504 return this 3505 3506 siblings = True 3507 3508 return self.expression( 3509 exp.Order, 3510 this=this, 3511 expressions=self._parse_csv(self._parse_ordered), 3512 interpolate=self._parse_interpolate(), 3513 siblings=siblings, 3514 ) 3515 3516 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3517 if not self._match(token): 3518 return None 3519 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3520 3521 def _parse_ordered( 3522 self, parse_method: t.Optional[t.Callable] = None 3523 ) -> t.Optional[exp.Ordered]: 3524 this = parse_method() if parse_method else self._parse_conjunction() 3525 if not this: 3526 return None 3527 3528 asc = self._match(TokenType.ASC) 3529 desc = self._match(TokenType.DESC) or (asc and False) 3530 3531 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3532 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3533 3534 nulls_first = is_nulls_first or False 3535 explicitly_null_ordered = is_nulls_first or is_nulls_last 3536 3537 if ( 3538 not explicitly_null_ordered 3539 and ( 3540 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3541 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3542 ) 3543 and self.dialect.NULL_ORDERING != "nulls_are_last" 3544 ): 3545 nulls_first = True 3546 3547 if self._match_text_seq("WITH", "FILL"): 3548 with_fill = self.expression( 3549 exp.WithFill, 3550 **{ # type: ignore 3551 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3552 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3553 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3554 }, 3555 ) 3556 else: 3557 with_fill = None 3558 3559 return self.expression( 3560 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3561 ) 3562 3563 def _parse_limit( 3564 self, 3565 this: t.Optional[exp.Expression] = None, 3566 top: bool = False, 3567 skip_limit_token: bool = False, 3568 ) -> t.Optional[exp.Expression]: 3569 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3570 comments = self._prev_comments 3571 if top: 3572 limit_paren = self._match(TokenType.L_PAREN) 3573 expression = self._parse_term() if limit_paren else self._parse_number() 3574 3575 if limit_paren: 3576 self._match_r_paren() 3577 else: 3578 expression = self._parse_term() 3579 3580 if self._match(TokenType.COMMA): 3581 offset = expression 3582 expression = self._parse_term() 3583 else: 3584 offset = None 3585 3586 limit_exp = self.expression( 3587 exp.Limit, 3588 this=this, 3589 expression=expression, 3590 offset=offset, 3591 comments=comments, 3592 expressions=self._parse_limit_by(), 3593 ) 3594 3595 return limit_exp 3596 3597 if self._match(TokenType.FETCH): 3598 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3599 direction = self._prev.text.upper() if direction else "FIRST" 3600 3601 count = self._parse_field(tokens=self.FETCH_TOKENS) 3602 percent = self._match(TokenType.PERCENT) 3603 3604 self._match_set((TokenType.ROW, TokenType.ROWS)) 3605 3606 only = self._match_text_seq("ONLY") 3607 with_ties = self._match_text_seq("WITH", "TIES") 3608 3609 if only and with_ties: 3610 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3611 3612 return self.expression( 3613 exp.Fetch, 3614 direction=direction, 3615 count=count, 3616 percent=percent, 3617 with_ties=with_ties, 3618 ) 3619 3620 return this 3621 3622 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3623 if not self._match(TokenType.OFFSET): 3624 return this 3625 3626 count = self._parse_term() 3627 self._match_set((TokenType.ROW, TokenType.ROWS)) 3628 3629 return self.expression( 3630 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3631 ) 3632 3633 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3634 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3635 3636 def _parse_locks(self) -> t.List[exp.Lock]: 3637 locks = [] 3638 while True: 3639 if self._match_text_seq("FOR", "UPDATE"): 3640 update = True 3641 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3642 "LOCK", "IN", "SHARE", "MODE" 3643 ): 3644 update = False 3645 else: 3646 break 3647 3648 expressions = None 3649 if self._match_text_seq("OF"): 3650 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3651 3652 wait: t.Optional[bool | exp.Expression] = None 3653 if self._match_text_seq("NOWAIT"): 3654 wait = True 3655 elif self._match_text_seq("WAIT"): 3656 wait = self._parse_primary() 3657 elif self._match_text_seq("SKIP", "LOCKED"): 3658 wait = False 3659 3660 locks.append( 3661 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3662 ) 3663 3664 return locks 3665 3666 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3667 while this and self._match_set(self.SET_OPERATIONS): 3668 token_type = self._prev.token_type 3669 3670 if token_type == TokenType.UNION: 3671 operation = exp.Union 3672 elif token_type == TokenType.EXCEPT: 3673 operation = exp.Except 3674 else: 3675 operation = exp.Intersect 3676 3677 comments = self._prev.comments 3678 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3679 by_name = self._match_text_seq("BY", "NAME") 3680 expression = self._parse_select(nested=True, parse_set_operation=False) 3681 3682 this = self.expression( 3683 operation, 3684 comments=comments, 3685 this=this, 3686 distinct=distinct, 3687 by_name=by_name, 3688 expression=expression, 3689 ) 3690 3691 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3692 expression = this.expression 3693 3694 if expression: 3695 for arg in self.UNION_MODIFIERS: 3696 expr = expression.args.get(arg) 3697 if expr: 3698 this.set(arg, expr.pop()) 3699 3700 return this 3701 3702 def _parse_expression(self) -> t.Optional[exp.Expression]: 3703 return self._parse_alias(self._parse_conjunction()) 3704 3705 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3706 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3707 3708 def _parse_equality(self) -> t.Optional[exp.Expression]: 3709 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3710 3711 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3712 return self._parse_tokens(self._parse_range, self.COMPARISON) 3713 3714 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3715 this = this or self._parse_bitwise() 3716 negate = self._match(TokenType.NOT) 3717 3718 if self._match_set(self.RANGE_PARSERS): 3719 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3720 if not expression: 3721 return this 3722 3723 this = expression 3724 elif self._match(TokenType.ISNULL): 3725 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3726 3727 # Postgres supports ISNULL and NOTNULL for conditions. 3728 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3729 if self._match(TokenType.NOTNULL): 3730 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3731 this = self.expression(exp.Not, this=this) 3732 3733 if negate: 3734 this = self.expression(exp.Not, this=this) 3735 3736 if self._match(TokenType.IS): 3737 this = self._parse_is(this) 3738 3739 return this 3740 3741 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3742 index = self._index - 1 3743 negate = self._match(TokenType.NOT) 3744 3745 if self._match_text_seq("DISTINCT", "FROM"): 3746 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3747 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3748 3749 expression = self._parse_null() or self._parse_boolean() 3750 if not expression: 3751 self._retreat(index) 3752 return None 3753 3754 this = self.expression(exp.Is, this=this, expression=expression) 3755 return self.expression(exp.Not, this=this) if negate else this 3756 3757 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3758 unnest = self._parse_unnest(with_alias=False) 3759 if unnest: 3760 this = self.expression(exp.In, this=this, unnest=unnest) 3761 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3762 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3763 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3764 3765 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3766 this = self.expression(exp.In, this=this, query=expressions[0]) 3767 else: 3768 this = self.expression(exp.In, this=this, expressions=expressions) 3769 3770 if matched_l_paren: 3771 self._match_r_paren(this) 3772 elif not self._match(TokenType.R_BRACKET, expression=this): 3773 self.raise_error("Expecting ]") 3774 else: 3775 this = self.expression(exp.In, this=this, field=self._parse_field()) 3776 3777 return this 3778 3779 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3780 low = self._parse_bitwise() 3781 self._match(TokenType.AND) 3782 high = self._parse_bitwise() 3783 return self.expression(exp.Between, this=this, low=low, high=high) 3784 3785 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3786 if not self._match(TokenType.ESCAPE): 3787 return this 3788 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3789 3790 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3791 index = self._index 3792 3793 if not self._match(TokenType.INTERVAL) and match_interval: 3794 return None 3795 3796 if self._match(TokenType.STRING, advance=False): 3797 this = self._parse_primary() 3798 else: 3799 this = self._parse_term() 3800 3801 if not this or ( 3802 isinstance(this, exp.Column) 3803 and not this.table 3804 and not this.this.quoted 3805 and this.name.upper() == "IS" 3806 ): 3807 self._retreat(index) 3808 return None 3809 3810 unit = self._parse_function() or ( 3811 not self._match(TokenType.ALIAS, advance=False) 3812 and self._parse_var(any_token=True, upper=True) 3813 ) 3814 3815 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3816 # each INTERVAL expression into this canonical form so it's easy to transpile 3817 if this and this.is_number: 3818 this = exp.Literal.string(this.name) 3819 elif this and this.is_string: 3820 parts = this.name.split() 3821 3822 if len(parts) == 2: 3823 if unit: 3824 # This is not actually a unit, it's something else (e.g. a "window side") 3825 unit = None 3826 self._retreat(self._index - 1) 3827 3828 this = exp.Literal.string(parts[0]) 3829 unit = self.expression(exp.Var, this=parts[1].upper()) 3830 3831 return self.expression(exp.Interval, this=this, unit=unit) 3832 3833 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3834 this = self._parse_term() 3835 3836 while True: 3837 if self._match_set(self.BITWISE): 3838 this = self.expression( 3839 self.BITWISE[self._prev.token_type], 3840 this=this, 3841 expression=self._parse_term(), 3842 ) 3843 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3844 this = self.expression( 3845 exp.DPipe, 3846 this=this, 3847 expression=self._parse_term(), 3848 safe=not self.dialect.STRICT_STRING_CONCAT, 3849 ) 3850 elif self._match(TokenType.DQMARK): 3851 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3852 elif self._match_pair(TokenType.LT, TokenType.LT): 3853 this = self.expression( 3854 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3855 ) 3856 elif self._match_pair(TokenType.GT, TokenType.GT): 3857 this = self.expression( 3858 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3859 ) 3860 else: 3861 break 3862 3863 return this 3864 3865 def _parse_term(self) -> t.Optional[exp.Expression]: 3866 return self._parse_tokens(self._parse_factor, self.TERM) 3867 3868 def _parse_factor(self) -> t.Optional[exp.Expression]: 3869 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3870 this = parse_method() 3871 3872 while self._match_set(self.FACTOR): 3873 this = self.expression( 3874 self.FACTOR[self._prev.token_type], 3875 this=this, 3876 comments=self._prev_comments, 3877 expression=parse_method(), 3878 ) 3879 if isinstance(this, exp.Div): 3880 this.args["typed"] = self.dialect.TYPED_DIVISION 3881 this.args["safe"] = self.dialect.SAFE_DIVISION 3882 3883 return this 3884 3885 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3886 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3887 3888 def _parse_unary(self) -> t.Optional[exp.Expression]: 3889 if self._match_set(self.UNARY_PARSERS): 3890 return self.UNARY_PARSERS[self._prev.token_type](self) 3891 return self._parse_at_time_zone(self._parse_type()) 3892 3893 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3894 interval = parse_interval and self._parse_interval() 3895 if interval: 3896 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3897 while True: 3898 index = self._index 3899 self._match(TokenType.PLUS) 3900 3901 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3902 self._retreat(index) 3903 break 3904 3905 interval = self.expression( # type: ignore 3906 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3907 ) 3908 3909 return interval 3910 3911 index = self._index 3912 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3913 this = self._parse_column() 3914 3915 if data_type: 3916 if isinstance(this, exp.Literal): 3917 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3918 if parser: 3919 return parser(self, this, data_type) 3920 return self.expression(exp.Cast, this=this, to=data_type) 3921 if not data_type.expressions: 3922 self._retreat(index) 3923 return self._parse_column() 3924 return self._parse_column_ops(data_type) 3925 3926 return this and self._parse_column_ops(this) 3927 3928 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3929 this = self._parse_type() 3930 if not this: 3931 return None 3932 3933 if isinstance(this, exp.Column) and not this.table: 3934 this = exp.var(this.name.upper()) 3935 3936 return self.expression( 3937 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3938 ) 3939 3940 def _parse_types( 3941 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3942 ) -> t.Optional[exp.Expression]: 3943 index = self._index 3944 3945 prefix = self._match_text_seq("SYSUDTLIB", ".") 3946 3947 if not self._match_set(self.TYPE_TOKENS): 3948 identifier = allow_identifiers and self._parse_id_var( 3949 any_token=False, tokens=(TokenType.VAR,) 3950 ) 3951 if identifier: 3952 tokens = self.dialect.tokenize(identifier.name) 3953 3954 if len(tokens) != 1: 3955 self.raise_error("Unexpected identifier", self._prev) 3956 3957 if tokens[0].token_type in self.TYPE_TOKENS: 3958 self._prev = tokens[0] 3959 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3960 type_name = identifier.name 3961 3962 while self._match(TokenType.DOT): 3963 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3964 3965 return exp.DataType.build(type_name, udt=True) 3966 else: 3967 self._retreat(self._index - 1) 3968 return None 3969 else: 3970 return None 3971 3972 type_token = self._prev.token_type 3973 3974 if type_token == TokenType.PSEUDO_TYPE: 3975 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3976 3977 if type_token == TokenType.OBJECT_IDENTIFIER: 3978 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3979 3980 nested = type_token in self.NESTED_TYPE_TOKENS 3981 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3982 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3983 expressions = None 3984 maybe_func = False 3985 3986 if self._match(TokenType.L_PAREN): 3987 if is_struct: 3988 expressions = self._parse_csv(self._parse_struct_types) 3989 elif nested: 3990 expressions = self._parse_csv( 3991 lambda: self._parse_types( 3992 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3993 ) 3994 ) 3995 elif type_token in self.ENUM_TYPE_TOKENS: 3996 expressions = self._parse_csv(self._parse_equality) 3997 elif is_aggregate: 3998 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3999 any_token=False, tokens=(TokenType.VAR,) 4000 ) 4001 if not func_or_ident or not self._match(TokenType.COMMA): 4002 return None 4003 expressions = self._parse_csv( 4004 lambda: self._parse_types( 4005 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4006 ) 4007 ) 4008 expressions.insert(0, func_or_ident) 4009 else: 4010 expressions = self._parse_csv(self._parse_type_size) 4011 4012 if not expressions or not self._match(TokenType.R_PAREN): 4013 self._retreat(index) 4014 return None 4015 4016 maybe_func = True 4017 4018 this: t.Optional[exp.Expression] = None 4019 values: t.Optional[t.List[exp.Expression]] = None 4020 4021 if nested and self._match(TokenType.LT): 4022 if is_struct: 4023 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4024 else: 4025 expressions = self._parse_csv( 4026 lambda: self._parse_types( 4027 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4028 ) 4029 ) 4030 4031 if not self._match(TokenType.GT): 4032 self.raise_error("Expecting >") 4033 4034 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4035 values = self._parse_csv(self._parse_conjunction) 4036 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4037 4038 if type_token in self.TIMESTAMPS: 4039 if self._match_text_seq("WITH", "TIME", "ZONE"): 4040 maybe_func = False 4041 tz_type = ( 4042 exp.DataType.Type.TIMETZ 4043 if type_token in self.TIMES 4044 else exp.DataType.Type.TIMESTAMPTZ 4045 ) 4046 this = exp.DataType(this=tz_type, expressions=expressions) 4047 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4048 maybe_func = False 4049 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4050 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4051 maybe_func = False 4052 elif type_token == TokenType.INTERVAL: 4053 unit = self._parse_var() 4054 4055 if self._match_text_seq("TO"): 4056 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 4057 else: 4058 span = None 4059 4060 if span or not unit: 4061 this = self.expression( 4062 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 4063 ) 4064 else: 4065 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4066 4067 if maybe_func and check_func: 4068 index2 = self._index 4069 peek = self._parse_string() 4070 4071 if not peek: 4072 self._retreat(index) 4073 return None 4074 4075 self._retreat(index2) 4076 4077 if not this: 4078 if self._match_text_seq("UNSIGNED"): 4079 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4080 if not unsigned_type_token: 4081 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4082 4083 type_token = unsigned_type_token or type_token 4084 4085 this = exp.DataType( 4086 this=exp.DataType.Type[type_token.value], 4087 expressions=expressions, 4088 nested=nested, 4089 values=values, 4090 prefix=prefix, 4091 ) 4092 4093 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4094 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4095 4096 return this 4097 4098 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4099 index = self._index 4100 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4101 self._match(TokenType.COLON) 4102 column_def = self._parse_column_def(this) 4103 4104 if type_required and ( 4105 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4106 ): 4107 self._retreat(index) 4108 return self._parse_types() 4109 4110 return column_def 4111 4112 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4113 if not self._match_text_seq("AT", "TIME", "ZONE"): 4114 return this 4115 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4116 4117 def _parse_column(self) -> t.Optional[exp.Expression]: 4118 this = self._parse_column_reference() 4119 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4120 4121 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4122 this = self._parse_field() 4123 if ( 4124 not this 4125 and self._match(TokenType.VALUES, advance=False) 4126 and self.VALUES_FOLLOWED_BY_PAREN 4127 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4128 ): 4129 this = self._parse_id_var() 4130 4131 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 4132 4133 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4134 this = self._parse_bracket(this) 4135 4136 while self._match_set(self.COLUMN_OPERATORS): 4137 op_token = self._prev.token_type 4138 op = self.COLUMN_OPERATORS.get(op_token) 4139 4140 if op_token == TokenType.DCOLON: 4141 field = self._parse_types() 4142 if not field: 4143 self.raise_error("Expected type") 4144 elif op and self._curr: 4145 field = self._parse_column_reference() 4146 else: 4147 field = self._parse_field(anonymous_func=True, any_token=True) 4148 4149 if isinstance(field, exp.Func) and this: 4150 # bigquery allows function calls like x.y.count(...) 4151 # SAFE.SUBSTR(...) 4152 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4153 this = exp.replace_tree( 4154 this, 4155 lambda n: ( 4156 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4157 if n.table 4158 else n.this 4159 ) 4160 if isinstance(n, exp.Column) 4161 else n, 4162 ) 4163 4164 if op: 4165 this = op(self, this, field) 4166 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4167 this = self.expression( 4168 exp.Column, 4169 this=field, 4170 table=this.this, 4171 db=this.args.get("table"), 4172 catalog=this.args.get("db"), 4173 ) 4174 else: 4175 this = self.expression(exp.Dot, this=this, expression=field) 4176 this = self._parse_bracket(this) 4177 return this 4178 4179 def _parse_primary(self) -> t.Optional[exp.Expression]: 4180 if self._match_set(self.PRIMARY_PARSERS): 4181 token_type = self._prev.token_type 4182 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4183 4184 if token_type == TokenType.STRING: 4185 expressions = [primary] 4186 while self._match(TokenType.STRING): 4187 expressions.append(exp.Literal.string(self._prev.text)) 4188 4189 if len(expressions) > 1: 4190 return self.expression(exp.Concat, expressions=expressions) 4191 4192 return primary 4193 4194 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4195 return exp.Literal.number(f"0.{self._prev.text}") 4196 4197 if self._match(TokenType.L_PAREN): 4198 comments = self._prev_comments 4199 query = self._parse_select() 4200 4201 if query: 4202 expressions = [query] 4203 else: 4204 expressions = self._parse_expressions() 4205 4206 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4207 4208 if isinstance(this, exp.UNWRAPPED_QUERIES): 4209 this = self._parse_set_operations( 4210 self._parse_subquery(this=this, parse_alias=False) 4211 ) 4212 elif isinstance(this, exp.Subquery): 4213 this = self._parse_subquery( 4214 this=self._parse_set_operations(this), parse_alias=False 4215 ) 4216 elif len(expressions) > 1: 4217 this = self.expression(exp.Tuple, expressions=expressions) 4218 else: 4219 this = self.expression(exp.Paren, this=this) 4220 4221 if this: 4222 this.add_comments(comments) 4223 4224 self._match_r_paren(expression=this) 4225 return this 4226 4227 return None 4228 4229 def _parse_field( 4230 self, 4231 any_token: bool = False, 4232 tokens: t.Optional[t.Collection[TokenType]] = None, 4233 anonymous_func: bool = False, 4234 ) -> t.Optional[exp.Expression]: 4235 return ( 4236 self._parse_primary() 4237 or self._parse_function(anonymous=anonymous_func) 4238 or self._parse_id_var(any_token=any_token, tokens=tokens) 4239 ) 4240 4241 def _parse_function( 4242 self, 4243 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4244 anonymous: bool = False, 4245 optional_parens: bool = True, 4246 ) -> t.Optional[exp.Expression]: 4247 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4248 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4249 fn_syntax = False 4250 if ( 4251 self._match(TokenType.L_BRACE, advance=False) 4252 and self._next 4253 and self._next.text.upper() == "FN" 4254 ): 4255 self._advance(2) 4256 fn_syntax = True 4257 4258 func = self._parse_function_call( 4259 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4260 ) 4261 4262 if fn_syntax: 4263 self._match(TokenType.R_BRACE) 4264 4265 return func 4266 4267 def _parse_function_call( 4268 self, 4269 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4270 anonymous: bool = False, 4271 optional_parens: bool = True, 4272 ) -> t.Optional[exp.Expression]: 4273 if not self._curr: 4274 return None 4275 4276 comments = self._curr.comments 4277 token_type = self._curr.token_type 4278 this = self._curr.text 4279 upper = this.upper() 4280 4281 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4282 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4283 self._advance() 4284 return self._parse_window(parser(self)) 4285 4286 if not self._next or self._next.token_type != TokenType.L_PAREN: 4287 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4288 self._advance() 4289 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4290 4291 return None 4292 4293 if token_type not in self.FUNC_TOKENS: 4294 return None 4295 4296 self._advance(2) 4297 4298 parser = self.FUNCTION_PARSERS.get(upper) 4299 if parser and not anonymous: 4300 this = parser(self) 4301 else: 4302 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4303 4304 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4305 this = self.expression(subquery_predicate, this=self._parse_select()) 4306 self._match_r_paren() 4307 return this 4308 4309 if functions is None: 4310 functions = self.FUNCTIONS 4311 4312 function = functions.get(upper) 4313 4314 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4315 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4316 4317 if alias: 4318 args = self._kv_to_prop_eq(args) 4319 4320 if function and not anonymous: 4321 if "dialect" in function.__code__.co_varnames: 4322 func = function(args, dialect=self.dialect) 4323 else: 4324 func = function(args) 4325 4326 func = self.validate_expression(func, args) 4327 if not self.dialect.NORMALIZE_FUNCTIONS: 4328 func.meta["name"] = this 4329 4330 this = func 4331 else: 4332 if token_type == TokenType.IDENTIFIER: 4333 this = exp.Identifier(this=this, quoted=True) 4334 this = self.expression(exp.Anonymous, this=this, expressions=args) 4335 4336 if isinstance(this, exp.Expression): 4337 this.add_comments(comments) 4338 4339 self._match_r_paren(this) 4340 return self._parse_window(this) 4341 4342 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4343 transformed = [] 4344 4345 for e in expressions: 4346 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4347 if isinstance(e, exp.Alias): 4348 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4349 4350 if not isinstance(e, exp.PropertyEQ): 4351 e = self.expression( 4352 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4353 ) 4354 4355 if isinstance(e.this, exp.Column): 4356 e.this.replace(e.this.this) 4357 4358 transformed.append(e) 4359 4360 return transformed 4361 4362 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4363 return self._parse_column_def(self._parse_id_var()) 4364 4365 def _parse_user_defined_function( 4366 self, kind: t.Optional[TokenType] = None 4367 ) -> t.Optional[exp.Expression]: 4368 this = self._parse_id_var() 4369 4370 while self._match(TokenType.DOT): 4371 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4372 4373 if not self._match(TokenType.L_PAREN): 4374 return this 4375 4376 expressions = self._parse_csv(self._parse_function_parameter) 4377 self._match_r_paren() 4378 return self.expression( 4379 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4380 ) 4381 4382 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4383 literal = self._parse_primary() 4384 if literal: 4385 return self.expression(exp.Introducer, this=token.text, expression=literal) 4386 4387 return self.expression(exp.Identifier, this=token.text) 4388 4389 def _parse_session_parameter(self) -> exp.SessionParameter: 4390 kind = None 4391 this = self._parse_id_var() or self._parse_primary() 4392 4393 if this and self._match(TokenType.DOT): 4394 kind = this.name 4395 this = self._parse_var() or self._parse_primary() 4396 4397 return self.expression(exp.SessionParameter, this=this, kind=kind) 4398 4399 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4400 index = self._index 4401 4402 if self._match(TokenType.L_PAREN): 4403 expressions = t.cast( 4404 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4405 ) 4406 4407 if not self._match(TokenType.R_PAREN): 4408 self._retreat(index) 4409 else: 4410 expressions = [self._parse_id_var()] 4411 4412 if self._match_set(self.LAMBDAS): 4413 return self.LAMBDAS[self._prev.token_type](self, expressions) 4414 4415 self._retreat(index) 4416 4417 this: t.Optional[exp.Expression] 4418 4419 if self._match(TokenType.DISTINCT): 4420 this = self.expression( 4421 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4422 ) 4423 else: 4424 this = self._parse_select_or_expression(alias=alias) 4425 4426 return self._parse_limit( 4427 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4428 ) 4429 4430 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4431 index = self._index 4432 4433 if not self.errors: 4434 try: 4435 if self._parse_select(nested=True): 4436 return this 4437 except ParseError: 4438 pass 4439 finally: 4440 self.errors.clear() 4441 self._retreat(index) 4442 4443 if not self._match(TokenType.L_PAREN): 4444 return this 4445 4446 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4447 4448 self._match_r_paren() 4449 return self.expression(exp.Schema, this=this, expressions=args) 4450 4451 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4452 return self._parse_column_def(self._parse_field(any_token=True)) 4453 4454 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4455 # column defs are not really columns, they're identifiers 4456 if isinstance(this, exp.Column): 4457 this = this.this 4458 4459 kind = self._parse_types(schema=True) 4460 4461 if self._match_text_seq("FOR", "ORDINALITY"): 4462 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4463 4464 constraints: t.List[exp.Expression] = [] 4465 4466 if not kind and self._match(TokenType.ALIAS): 4467 constraints.append( 4468 self.expression( 4469 exp.ComputedColumnConstraint, 4470 this=self._parse_conjunction(), 4471 persisted=self._match_text_seq("PERSISTED"), 4472 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4473 ) 4474 ) 4475 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4476 self._match(TokenType.ALIAS) 4477 constraints.append( 4478 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4479 ) 4480 4481 while True: 4482 constraint = self._parse_column_constraint() 4483 if not constraint: 4484 break 4485 constraints.append(constraint) 4486 4487 if not kind and not constraints: 4488 return this 4489 4490 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4491 4492 def _parse_auto_increment( 4493 self, 4494 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4495 start = None 4496 increment = None 4497 4498 if self._match(TokenType.L_PAREN, advance=False): 4499 args = self._parse_wrapped_csv(self._parse_bitwise) 4500 start = seq_get(args, 0) 4501 increment = seq_get(args, 1) 4502 elif self._match_text_seq("START"): 4503 start = self._parse_bitwise() 4504 self._match_text_seq("INCREMENT") 4505 increment = self._parse_bitwise() 4506 4507 if start and increment: 4508 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4509 4510 return exp.AutoIncrementColumnConstraint() 4511 4512 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4513 if not self._match_text_seq("REFRESH"): 4514 self._retreat(self._index - 1) 4515 return None 4516 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4517 4518 def _parse_compress(self) -> exp.CompressColumnConstraint: 4519 if self._match(TokenType.L_PAREN, advance=False): 4520 return self.expression( 4521 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4522 ) 4523 4524 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4525 4526 def _parse_generated_as_identity( 4527 self, 4528 ) -> ( 4529 exp.GeneratedAsIdentityColumnConstraint 4530 | exp.ComputedColumnConstraint 4531 | exp.GeneratedAsRowColumnConstraint 4532 ): 4533 if self._match_text_seq("BY", "DEFAULT"): 4534 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4535 this = self.expression( 4536 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4537 ) 4538 else: 4539 self._match_text_seq("ALWAYS") 4540 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4541 4542 self._match(TokenType.ALIAS) 4543 4544 if self._match_text_seq("ROW"): 4545 start = self._match_text_seq("START") 4546 if not start: 4547 self._match(TokenType.END) 4548 hidden = self._match_text_seq("HIDDEN") 4549 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4550 4551 identity = self._match_text_seq("IDENTITY") 4552 4553 if self._match(TokenType.L_PAREN): 4554 if self._match(TokenType.START_WITH): 4555 this.set("start", self._parse_bitwise()) 4556 if self._match_text_seq("INCREMENT", "BY"): 4557 this.set("increment", self._parse_bitwise()) 4558 if self._match_text_seq("MINVALUE"): 4559 this.set("minvalue", self._parse_bitwise()) 4560 if self._match_text_seq("MAXVALUE"): 4561 this.set("maxvalue", self._parse_bitwise()) 4562 4563 if self._match_text_seq("CYCLE"): 4564 this.set("cycle", True) 4565 elif self._match_text_seq("NO", "CYCLE"): 4566 this.set("cycle", False) 4567 4568 if not identity: 4569 this.set("expression", self._parse_bitwise()) 4570 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4571 args = self._parse_csv(self._parse_bitwise) 4572 this.set("start", seq_get(args, 0)) 4573 this.set("increment", seq_get(args, 1)) 4574 4575 self._match_r_paren() 4576 4577 return this 4578 4579 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4580 self._match_text_seq("LENGTH") 4581 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4582 4583 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4584 if self._match_text_seq("NULL"): 4585 return self.expression(exp.NotNullColumnConstraint) 4586 if self._match_text_seq("CASESPECIFIC"): 4587 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4588 if self._match_text_seq("FOR", "REPLICATION"): 4589 return self.expression(exp.NotForReplicationColumnConstraint) 4590 return None 4591 4592 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4593 if self._match(TokenType.CONSTRAINT): 4594 this = self._parse_id_var() 4595 else: 4596 this = None 4597 4598 if self._match_texts(self.CONSTRAINT_PARSERS): 4599 return self.expression( 4600 exp.ColumnConstraint, 4601 this=this, 4602 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4603 ) 4604 4605 return this 4606 4607 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4608 if not self._match(TokenType.CONSTRAINT): 4609 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4610 4611 return self.expression( 4612 exp.Constraint, 4613 this=self._parse_id_var(), 4614 expressions=self._parse_unnamed_constraints(), 4615 ) 4616 4617 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4618 constraints = [] 4619 while True: 4620 constraint = self._parse_unnamed_constraint() or self._parse_function() 4621 if not constraint: 4622 break 4623 constraints.append(constraint) 4624 4625 return constraints 4626 4627 def _parse_unnamed_constraint( 4628 self, constraints: t.Optional[t.Collection[str]] = None 4629 ) -> t.Optional[exp.Expression]: 4630 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4631 constraints or self.CONSTRAINT_PARSERS 4632 ): 4633 return None 4634 4635 constraint = self._prev.text.upper() 4636 if constraint not in self.CONSTRAINT_PARSERS: 4637 self.raise_error(f"No parser found for schema constraint {constraint}.") 4638 4639 return self.CONSTRAINT_PARSERS[constraint](self) 4640 4641 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4642 self._match_text_seq("KEY") 4643 return self.expression( 4644 exp.UniqueColumnConstraint, 4645 this=self._parse_schema(self._parse_id_var(any_token=False)), 4646 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4647 on_conflict=self._parse_on_conflict(), 4648 ) 4649 4650 def _parse_key_constraint_options(self) -> t.List[str]: 4651 options = [] 4652 while True: 4653 if not self._curr: 4654 break 4655 4656 if self._match(TokenType.ON): 4657 action = None 4658 on = self._advance_any() and self._prev.text 4659 4660 if self._match_text_seq("NO", "ACTION"): 4661 action = "NO ACTION" 4662 elif self._match_text_seq("CASCADE"): 4663 action = "CASCADE" 4664 elif self._match_text_seq("RESTRICT"): 4665 action = "RESTRICT" 4666 elif self._match_pair(TokenType.SET, TokenType.NULL): 4667 action = "SET NULL" 4668 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4669 action = "SET DEFAULT" 4670 else: 4671 self.raise_error("Invalid key constraint") 4672 4673 options.append(f"ON {on} {action}") 4674 elif self._match_text_seq("NOT", "ENFORCED"): 4675 options.append("NOT ENFORCED") 4676 elif self._match_text_seq("DEFERRABLE"): 4677 options.append("DEFERRABLE") 4678 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4679 options.append("INITIALLY DEFERRED") 4680 elif self._match_text_seq("NORELY"): 4681 options.append("NORELY") 4682 elif self._match_text_seq("MATCH", "FULL"): 4683 options.append("MATCH FULL") 4684 else: 4685 break 4686 4687 return options 4688 4689 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4690 if match and not self._match(TokenType.REFERENCES): 4691 return None 4692 4693 expressions = None 4694 this = self._parse_table(schema=True) 4695 options = self._parse_key_constraint_options() 4696 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4697 4698 def _parse_foreign_key(self) -> exp.ForeignKey: 4699 expressions = self._parse_wrapped_id_vars() 4700 reference = self._parse_references() 4701 options = {} 4702 4703 while self._match(TokenType.ON): 4704 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4705 self.raise_error("Expected DELETE or UPDATE") 4706 4707 kind = self._prev.text.lower() 4708 4709 if self._match_text_seq("NO", "ACTION"): 4710 action = "NO ACTION" 4711 elif self._match(TokenType.SET): 4712 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4713 action = "SET " + self._prev.text.upper() 4714 else: 4715 self._advance() 4716 action = self._prev.text.upper() 4717 4718 options[kind] = action 4719 4720 return self.expression( 4721 exp.ForeignKey, 4722 expressions=expressions, 4723 reference=reference, 4724 **options, # type: ignore 4725 ) 4726 4727 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4728 return self._parse_field() 4729 4730 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4731 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4732 self._retreat(self._index - 1) 4733 return None 4734 4735 id_vars = self._parse_wrapped_id_vars() 4736 return self.expression( 4737 exp.PeriodForSystemTimeConstraint, 4738 this=seq_get(id_vars, 0), 4739 expression=seq_get(id_vars, 1), 4740 ) 4741 4742 def _parse_primary_key( 4743 self, wrapped_optional: bool = False, in_props: bool = False 4744 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4745 desc = ( 4746 self._match_set((TokenType.ASC, TokenType.DESC)) 4747 and self._prev.token_type == TokenType.DESC 4748 ) 4749 4750 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4751 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4752 4753 expressions = self._parse_wrapped_csv( 4754 self._parse_primary_key_part, optional=wrapped_optional 4755 ) 4756 options = self._parse_key_constraint_options() 4757 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4758 4759 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4760 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4761 4762 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4763 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4764 return this 4765 4766 bracket_kind = self._prev.token_type 4767 expressions = self._parse_csv( 4768 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4769 ) 4770 4771 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4772 self.raise_error("Expected ]") 4773 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4774 self.raise_error("Expected }") 4775 4776 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4777 if bracket_kind == TokenType.L_BRACE: 4778 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4779 elif not this or this.name.upper() == "ARRAY": 4780 this = self.expression(exp.Array, expressions=expressions) 4781 else: 4782 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4783 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4784 4785 self._add_comments(this) 4786 return self._parse_bracket(this) 4787 4788 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4789 if self._match(TokenType.COLON): 4790 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4791 return this 4792 4793 def _parse_case(self) -> t.Optional[exp.Expression]: 4794 ifs = [] 4795 default = None 4796 4797 comments = self._prev_comments 4798 expression = self._parse_conjunction() 4799 4800 while self._match(TokenType.WHEN): 4801 this = self._parse_conjunction() 4802 self._match(TokenType.THEN) 4803 then = self._parse_conjunction() 4804 ifs.append(self.expression(exp.If, this=this, true=then)) 4805 4806 if self._match(TokenType.ELSE): 4807 default = self._parse_conjunction() 4808 4809 if not self._match(TokenType.END): 4810 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4811 default = exp.column("interval") 4812 else: 4813 self.raise_error("Expected END after CASE", self._prev) 4814 4815 return self.expression( 4816 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4817 ) 4818 4819 def _parse_if(self) -> t.Optional[exp.Expression]: 4820 if self._match(TokenType.L_PAREN): 4821 args = self._parse_csv(self._parse_conjunction) 4822 this = self.validate_expression(exp.If.from_arg_list(args), args) 4823 self._match_r_paren() 4824 else: 4825 index = self._index - 1 4826 4827 if self.NO_PAREN_IF_COMMANDS and index == 0: 4828 return self._parse_as_command(self._prev) 4829 4830 condition = self._parse_conjunction() 4831 4832 if not condition: 4833 self._retreat(index) 4834 return None 4835 4836 self._match(TokenType.THEN) 4837 true = self._parse_conjunction() 4838 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4839 self._match(TokenType.END) 4840 this = self.expression(exp.If, this=condition, true=true, false=false) 4841 4842 return this 4843 4844 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4845 if not self._match_text_seq("VALUE", "FOR"): 4846 self._retreat(self._index - 1) 4847 return None 4848 4849 return self.expression( 4850 exp.NextValueFor, 4851 this=self._parse_column(), 4852 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4853 ) 4854 4855 def _parse_extract(self) -> exp.Extract: 4856 this = self._parse_function() or self._parse_var() or self._parse_type() 4857 4858 if self._match(TokenType.FROM): 4859 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4860 4861 if not self._match(TokenType.COMMA): 4862 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4863 4864 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4865 4866 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4867 this = self._parse_conjunction() 4868 4869 if not self._match(TokenType.ALIAS): 4870 if self._match(TokenType.COMMA): 4871 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4872 4873 self.raise_error("Expected AS after CAST") 4874 4875 fmt = None 4876 to = self._parse_types() 4877 4878 if self._match(TokenType.FORMAT): 4879 fmt_string = self._parse_string() 4880 fmt = self._parse_at_time_zone(fmt_string) 4881 4882 if not to: 4883 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4884 if to.this in exp.DataType.TEMPORAL_TYPES: 4885 this = self.expression( 4886 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4887 this=this, 4888 format=exp.Literal.string( 4889 format_time( 4890 fmt_string.this if fmt_string else "", 4891 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4892 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4893 ) 4894 ), 4895 ) 4896 4897 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4898 this.set("zone", fmt.args["zone"]) 4899 return this 4900 elif not to: 4901 self.raise_error("Expected TYPE after CAST") 4902 elif isinstance(to, exp.Identifier): 4903 to = exp.DataType.build(to.name, udt=True) 4904 elif to.this == exp.DataType.Type.CHAR: 4905 if self._match(TokenType.CHARACTER_SET): 4906 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4907 4908 return self.expression( 4909 exp.Cast if strict else exp.TryCast, 4910 this=this, 4911 to=to, 4912 format=fmt, 4913 safe=safe, 4914 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 4915 ) 4916 4917 def _parse_string_agg(self) -> exp.Expression: 4918 if self._match(TokenType.DISTINCT): 4919 args: t.List[t.Optional[exp.Expression]] = [ 4920 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4921 ] 4922 if self._match(TokenType.COMMA): 4923 args.extend(self._parse_csv(self._parse_conjunction)) 4924 else: 4925 args = self._parse_csv(self._parse_conjunction) # type: ignore 4926 4927 index = self._index 4928 if not self._match(TokenType.R_PAREN) and args: 4929 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4930 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4931 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4932 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4933 4934 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4935 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4936 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4937 if not self._match_text_seq("WITHIN", "GROUP"): 4938 self._retreat(index) 4939 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4940 4941 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4942 order = self._parse_order(this=seq_get(args, 0)) 4943 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4944 4945 def _parse_convert( 4946 self, strict: bool, safe: t.Optional[bool] = None 4947 ) -> t.Optional[exp.Expression]: 4948 this = self._parse_bitwise() 4949 4950 if self._match(TokenType.USING): 4951 to: t.Optional[exp.Expression] = self.expression( 4952 exp.CharacterSet, this=self._parse_var() 4953 ) 4954 elif self._match(TokenType.COMMA): 4955 to = self._parse_types() 4956 else: 4957 to = None 4958 4959 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4960 4961 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4962 """ 4963 There are generally two variants of the DECODE function: 4964 4965 - DECODE(bin, charset) 4966 - DECODE(expression, search, result [, search, result] ... [, default]) 4967 4968 The second variant will always be parsed into a CASE expression. Note that NULL 4969 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4970 instead of relying on pattern matching. 4971 """ 4972 args = self._parse_csv(self._parse_conjunction) 4973 4974 if len(args) < 3: 4975 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4976 4977 expression, *expressions = args 4978 if not expression: 4979 return None 4980 4981 ifs = [] 4982 for search, result in zip(expressions[::2], expressions[1::2]): 4983 if not search or not result: 4984 return None 4985 4986 if isinstance(search, exp.Literal): 4987 ifs.append( 4988 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4989 ) 4990 elif isinstance(search, exp.Null): 4991 ifs.append( 4992 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4993 ) 4994 else: 4995 cond = exp.or_( 4996 exp.EQ(this=expression.copy(), expression=search), 4997 exp.and_( 4998 exp.Is(this=expression.copy(), expression=exp.Null()), 4999 exp.Is(this=search.copy(), expression=exp.Null()), 5000 copy=False, 5001 ), 5002 copy=False, 5003 ) 5004 ifs.append(exp.If(this=cond, true=result)) 5005 5006 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5007 5008 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5009 self._match_text_seq("KEY") 5010 key = self._parse_column() 5011 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5012 self._match_text_seq("VALUE") 5013 value = self._parse_bitwise() 5014 5015 if not key and not value: 5016 return None 5017 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5018 5019 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5020 if not this or not self._match_text_seq("FORMAT", "JSON"): 5021 return this 5022 5023 return self.expression(exp.FormatJson, this=this) 5024 5025 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5026 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5027 for value in values: 5028 if self._match_text_seq(value, "ON", on): 5029 return f"{value} ON {on}" 5030 5031 return None 5032 5033 @t.overload 5034 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5035 5036 @t.overload 5037 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5038 5039 def _parse_json_object(self, agg=False): 5040 star = self._parse_star() 5041 expressions = ( 5042 [star] 5043 if star 5044 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5045 ) 5046 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5047 5048 unique_keys = None 5049 if self._match_text_seq("WITH", "UNIQUE"): 5050 unique_keys = True 5051 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5052 unique_keys = False 5053 5054 self._match_text_seq("KEYS") 5055 5056 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5057 self._parse_type() 5058 ) 5059 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5060 5061 return self.expression( 5062 exp.JSONObjectAgg if agg else exp.JSONObject, 5063 expressions=expressions, 5064 null_handling=null_handling, 5065 unique_keys=unique_keys, 5066 return_type=return_type, 5067 encoding=encoding, 5068 ) 5069 5070 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5071 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5072 if not self._match_text_seq("NESTED"): 5073 this = self._parse_id_var() 5074 kind = self._parse_types(allow_identifiers=False) 5075 nested = None 5076 else: 5077 this = None 5078 kind = None 5079 nested = True 5080 5081 path = self._match_text_seq("PATH") and self._parse_string() 5082 nested_schema = nested and self._parse_json_schema() 5083 5084 return self.expression( 5085 exp.JSONColumnDef, 5086 this=this, 5087 kind=kind, 5088 path=path, 5089 nested_schema=nested_schema, 5090 ) 5091 5092 def _parse_json_schema(self) -> exp.JSONSchema: 5093 self._match_text_seq("COLUMNS") 5094 return self.expression( 5095 exp.JSONSchema, 5096 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5097 ) 5098 5099 def _parse_json_table(self) -> exp.JSONTable: 5100 this = self._parse_format_json(self._parse_bitwise()) 5101 path = self._match(TokenType.COMMA) and self._parse_string() 5102 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5103 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5104 schema = self._parse_json_schema() 5105 5106 return exp.JSONTable( 5107 this=this, 5108 schema=schema, 5109 path=path, 5110 error_handling=error_handling, 5111 empty_handling=empty_handling, 5112 ) 5113 5114 def _parse_match_against(self) -> exp.MatchAgainst: 5115 expressions = self._parse_csv(self._parse_column) 5116 5117 self._match_text_seq(")", "AGAINST", "(") 5118 5119 this = self._parse_string() 5120 5121 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5122 modifier = "IN NATURAL LANGUAGE MODE" 5123 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5124 modifier = f"{modifier} WITH QUERY EXPANSION" 5125 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5126 modifier = "IN BOOLEAN MODE" 5127 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5128 modifier = "WITH QUERY EXPANSION" 5129 else: 5130 modifier = None 5131 5132 return self.expression( 5133 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5134 ) 5135 5136 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5137 def _parse_open_json(self) -> exp.OpenJSON: 5138 this = self._parse_bitwise() 5139 path = self._match(TokenType.COMMA) and self._parse_string() 5140 5141 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5142 this = self._parse_field(any_token=True) 5143 kind = self._parse_types() 5144 path = self._parse_string() 5145 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5146 5147 return self.expression( 5148 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5149 ) 5150 5151 expressions = None 5152 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5153 self._match_l_paren() 5154 expressions = self._parse_csv(_parse_open_json_column_def) 5155 5156 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5157 5158 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5159 args = self._parse_csv(self._parse_bitwise) 5160 5161 if self._match(TokenType.IN): 5162 return self.expression( 5163 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5164 ) 5165 5166 if haystack_first: 5167 haystack = seq_get(args, 0) 5168 needle = seq_get(args, 1) 5169 else: 5170 needle = seq_get(args, 0) 5171 haystack = seq_get(args, 1) 5172 5173 return self.expression( 5174 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5175 ) 5176 5177 def _parse_predict(self) -> exp.Predict: 5178 self._match_text_seq("MODEL") 5179 this = self._parse_table() 5180 5181 self._match(TokenType.COMMA) 5182 self._match_text_seq("TABLE") 5183 5184 return self.expression( 5185 exp.Predict, 5186 this=this, 5187 expression=self._parse_table(), 5188 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5189 ) 5190 5191 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5192 args = self._parse_csv(self._parse_table) 5193 return exp.JoinHint(this=func_name.upper(), expressions=args) 5194 5195 def _parse_substring(self) -> exp.Substring: 5196 # Postgres supports the form: substring(string [from int] [for int]) 5197 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5198 5199 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5200 5201 if self._match(TokenType.FROM): 5202 args.append(self._parse_bitwise()) 5203 if self._match(TokenType.FOR): 5204 args.append(self._parse_bitwise()) 5205 5206 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5207 5208 def _parse_trim(self) -> exp.Trim: 5209 # https://www.w3resource.com/sql/character-functions/trim.php 5210 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5211 5212 position = None 5213 collation = None 5214 expression = None 5215 5216 if self._match_texts(self.TRIM_TYPES): 5217 position = self._prev.text.upper() 5218 5219 this = self._parse_bitwise() 5220 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5221 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5222 expression = self._parse_bitwise() 5223 5224 if invert_order: 5225 this, expression = expression, this 5226 5227 if self._match(TokenType.COLLATE): 5228 collation = self._parse_bitwise() 5229 5230 return self.expression( 5231 exp.Trim, this=this, position=position, expression=expression, collation=collation 5232 ) 5233 5234 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5235 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5236 5237 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5238 return self._parse_window(self._parse_id_var(), alias=True) 5239 5240 def _parse_respect_or_ignore_nulls( 5241 self, this: t.Optional[exp.Expression] 5242 ) -> t.Optional[exp.Expression]: 5243 if self._match_text_seq("IGNORE", "NULLS"): 5244 return self.expression(exp.IgnoreNulls, this=this) 5245 if self._match_text_seq("RESPECT", "NULLS"): 5246 return self.expression(exp.RespectNulls, this=this) 5247 return this 5248 5249 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5250 if self._match(TokenType.HAVING): 5251 self._match_texts(("MAX", "MIN")) 5252 max = self._prev.text.upper() != "MIN" 5253 return self.expression( 5254 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5255 ) 5256 5257 return this 5258 5259 def _parse_window( 5260 self, this: t.Optional[exp.Expression], alias: bool = False 5261 ) -> t.Optional[exp.Expression]: 5262 func = this 5263 comments = func.comments if isinstance(func, exp.Expression) else None 5264 5265 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5266 self._match(TokenType.WHERE) 5267 this = self.expression( 5268 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5269 ) 5270 self._match_r_paren() 5271 5272 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5273 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5274 if self._match_text_seq("WITHIN", "GROUP"): 5275 order = self._parse_wrapped(self._parse_order) 5276 this = self.expression(exp.WithinGroup, this=this, expression=order) 5277 5278 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5279 # Some dialects choose to implement and some do not. 5280 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5281 5282 # There is some code above in _parse_lambda that handles 5283 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5284 5285 # The below changes handle 5286 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5287 5288 # Oracle allows both formats 5289 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5290 # and Snowflake chose to do the same for familiarity 5291 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5292 if isinstance(this, exp.AggFunc): 5293 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5294 5295 if ignore_respect and ignore_respect is not this: 5296 ignore_respect.replace(ignore_respect.this) 5297 this = self.expression(ignore_respect.__class__, this=this) 5298 5299 this = self._parse_respect_or_ignore_nulls(this) 5300 5301 # bigquery select from window x AS (partition by ...) 5302 if alias: 5303 over = None 5304 self._match(TokenType.ALIAS) 5305 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5306 return this 5307 else: 5308 over = self._prev.text.upper() 5309 5310 if comments: 5311 func.comments = None # type: ignore 5312 5313 if not self._match(TokenType.L_PAREN): 5314 return self.expression( 5315 exp.Window, 5316 comments=comments, 5317 this=this, 5318 alias=self._parse_id_var(False), 5319 over=over, 5320 ) 5321 5322 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5323 5324 first = self._match(TokenType.FIRST) 5325 if self._match_text_seq("LAST"): 5326 first = False 5327 5328 partition, order = self._parse_partition_and_order() 5329 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5330 5331 if kind: 5332 self._match(TokenType.BETWEEN) 5333 start = self._parse_window_spec() 5334 self._match(TokenType.AND) 5335 end = self._parse_window_spec() 5336 5337 spec = self.expression( 5338 exp.WindowSpec, 5339 kind=kind, 5340 start=start["value"], 5341 start_side=start["side"], 5342 end=end["value"], 5343 end_side=end["side"], 5344 ) 5345 else: 5346 spec = None 5347 5348 self._match_r_paren() 5349 5350 window = self.expression( 5351 exp.Window, 5352 comments=comments, 5353 this=this, 5354 partition_by=partition, 5355 order=order, 5356 spec=spec, 5357 alias=window_alias, 5358 over=over, 5359 first=first, 5360 ) 5361 5362 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5363 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5364 return self._parse_window(window, alias=alias) 5365 5366 return window 5367 5368 def _parse_partition_and_order( 5369 self, 5370 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5371 return self._parse_partition_by(), self._parse_order() 5372 5373 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5374 self._match(TokenType.BETWEEN) 5375 5376 return { 5377 "value": ( 5378 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5379 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5380 or self._parse_bitwise() 5381 ), 5382 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5383 } 5384 5385 def _parse_alias( 5386 self, this: t.Optional[exp.Expression], explicit: bool = False 5387 ) -> t.Optional[exp.Expression]: 5388 any_token = self._match(TokenType.ALIAS) 5389 comments = self._prev_comments 5390 5391 if explicit and not any_token: 5392 return this 5393 5394 if self._match(TokenType.L_PAREN): 5395 aliases = self.expression( 5396 exp.Aliases, 5397 comments=comments, 5398 this=this, 5399 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5400 ) 5401 self._match_r_paren(aliases) 5402 return aliases 5403 5404 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5405 self.STRING_ALIASES and self._parse_string_as_identifier() 5406 ) 5407 5408 if alias: 5409 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5410 column = this.this 5411 5412 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5413 if not this.comments and column and column.comments: 5414 this.comments = column.comments 5415 column.comments = None 5416 5417 return this 5418 5419 def _parse_id_var( 5420 self, 5421 any_token: bool = True, 5422 tokens: t.Optional[t.Collection[TokenType]] = None, 5423 ) -> t.Optional[exp.Expression]: 5424 identifier = self._parse_identifier() 5425 5426 if identifier: 5427 return identifier 5428 5429 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5430 quoted = self._prev.token_type == TokenType.STRING 5431 return exp.Identifier(this=self._prev.text, quoted=quoted) 5432 5433 return None 5434 5435 def _parse_string(self) -> t.Optional[exp.Expression]: 5436 if self._match_set(self.STRING_PARSERS): 5437 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5438 return self._parse_placeholder() 5439 5440 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5441 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5442 5443 def _parse_number(self) -> t.Optional[exp.Expression]: 5444 if self._match_set(self.NUMERIC_PARSERS): 5445 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5446 return self._parse_placeholder() 5447 5448 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5449 if self._match(TokenType.IDENTIFIER): 5450 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5451 return self._parse_placeholder() 5452 5453 def _parse_var( 5454 self, 5455 any_token: bool = False, 5456 tokens: t.Optional[t.Collection[TokenType]] = None, 5457 upper: bool = False, 5458 ) -> t.Optional[exp.Expression]: 5459 if ( 5460 (any_token and self._advance_any()) 5461 or self._match(TokenType.VAR) 5462 or (self._match_set(tokens) if tokens else False) 5463 ): 5464 return self.expression( 5465 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5466 ) 5467 return self._parse_placeholder() 5468 5469 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5470 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5471 self._advance() 5472 return self._prev 5473 return None 5474 5475 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5476 return self._parse_var() or self._parse_string() 5477 5478 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5479 return self._parse_primary() or self._parse_var(any_token=True) 5480 5481 def _parse_null(self) -> t.Optional[exp.Expression]: 5482 if self._match_set(self.NULL_TOKENS): 5483 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5484 return self._parse_placeholder() 5485 5486 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5487 if self._match(TokenType.TRUE): 5488 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5489 if self._match(TokenType.FALSE): 5490 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5491 return self._parse_placeholder() 5492 5493 def _parse_star(self) -> t.Optional[exp.Expression]: 5494 if self._match(TokenType.STAR): 5495 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5496 return self._parse_placeholder() 5497 5498 def _parse_parameter(self) -> exp.Parameter: 5499 self._match(TokenType.L_BRACE) 5500 this = self._parse_identifier() or self._parse_primary_or_var() 5501 expression = self._match(TokenType.COLON) and ( 5502 self._parse_identifier() or self._parse_primary_or_var() 5503 ) 5504 self._match(TokenType.R_BRACE) 5505 return self.expression(exp.Parameter, this=this, expression=expression) 5506 5507 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5508 if self._match_set(self.PLACEHOLDER_PARSERS): 5509 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5510 if placeholder: 5511 return placeholder 5512 self._advance(-1) 5513 return None 5514 5515 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5516 if not self._match(TokenType.EXCEPT): 5517 return None 5518 if self._match(TokenType.L_PAREN, advance=False): 5519 return self._parse_wrapped_csv(self._parse_column) 5520 5521 except_column = self._parse_column() 5522 return [except_column] if except_column else None 5523 5524 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5525 if not self._match(TokenType.REPLACE): 5526 return None 5527 if self._match(TokenType.L_PAREN, advance=False): 5528 return self._parse_wrapped_csv(self._parse_expression) 5529 5530 replace_expression = self._parse_expression() 5531 return [replace_expression] if replace_expression else None 5532 5533 def _parse_csv( 5534 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5535 ) -> t.List[exp.Expression]: 5536 parse_result = parse_method() 5537 items = [parse_result] if parse_result is not None else [] 5538 5539 while self._match(sep): 5540 self._add_comments(parse_result) 5541 parse_result = parse_method() 5542 if parse_result is not None: 5543 items.append(parse_result) 5544 5545 return items 5546 5547 def _parse_tokens( 5548 self, parse_method: t.Callable, expressions: t.Dict 5549 ) -> t.Optional[exp.Expression]: 5550 this = parse_method() 5551 5552 while self._match_set(expressions): 5553 this = self.expression( 5554 expressions[self._prev.token_type], 5555 this=this, 5556 comments=self._prev_comments, 5557 expression=parse_method(), 5558 ) 5559 5560 return this 5561 5562 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5563 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5564 5565 def _parse_wrapped_csv( 5566 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5567 ) -> t.List[exp.Expression]: 5568 return self._parse_wrapped( 5569 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5570 ) 5571 5572 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5573 wrapped = self._match(TokenType.L_PAREN) 5574 if not wrapped and not optional: 5575 self.raise_error("Expecting (") 5576 parse_result = parse_method() 5577 if wrapped: 5578 self._match_r_paren() 5579 return parse_result 5580 5581 def _parse_expressions(self) -> t.List[exp.Expression]: 5582 return self._parse_csv(self._parse_expression) 5583 5584 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5585 return self._parse_select() or self._parse_set_operations( 5586 self._parse_expression() if alias else self._parse_conjunction() 5587 ) 5588 5589 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5590 return self._parse_query_modifiers( 5591 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5592 ) 5593 5594 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5595 this = None 5596 if self._match_texts(self.TRANSACTION_KIND): 5597 this = self._prev.text 5598 5599 self._match_texts(("TRANSACTION", "WORK")) 5600 5601 modes = [] 5602 while True: 5603 mode = [] 5604 while self._match(TokenType.VAR): 5605 mode.append(self._prev.text) 5606 5607 if mode: 5608 modes.append(" ".join(mode)) 5609 if not self._match(TokenType.COMMA): 5610 break 5611 5612 return self.expression(exp.Transaction, this=this, modes=modes) 5613 5614 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5615 chain = None 5616 savepoint = None 5617 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5618 5619 self._match_texts(("TRANSACTION", "WORK")) 5620 5621 if self._match_text_seq("TO"): 5622 self._match_text_seq("SAVEPOINT") 5623 savepoint = self._parse_id_var() 5624 5625 if self._match(TokenType.AND): 5626 chain = not self._match_text_seq("NO") 5627 self._match_text_seq("CHAIN") 5628 5629 if is_rollback: 5630 return self.expression(exp.Rollback, savepoint=savepoint) 5631 5632 return self.expression(exp.Commit, chain=chain) 5633 5634 def _parse_refresh(self) -> exp.Refresh: 5635 self._match(TokenType.TABLE) 5636 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5637 5638 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5639 if not self._match_text_seq("ADD"): 5640 return None 5641 5642 self._match(TokenType.COLUMN) 5643 exists_column = self._parse_exists(not_=True) 5644 expression = self._parse_field_def() 5645 5646 if expression: 5647 expression.set("exists", exists_column) 5648 5649 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5650 if self._match_texts(("FIRST", "AFTER")): 5651 position = self._prev.text 5652 column_position = self.expression( 5653 exp.ColumnPosition, this=self._parse_column(), position=position 5654 ) 5655 expression.set("position", column_position) 5656 5657 return expression 5658 5659 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5660 drop = self._match(TokenType.DROP) and self._parse_drop() 5661 if drop and not isinstance(drop, exp.Command): 5662 drop.set("kind", drop.args.get("kind", "COLUMN")) 5663 return drop 5664 5665 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5666 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5667 return self.expression( 5668 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5669 ) 5670 5671 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5672 index = self._index - 1 5673 5674 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5675 return self._parse_csv( 5676 lambda: self.expression( 5677 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5678 ) 5679 ) 5680 5681 self._retreat(index) 5682 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5683 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5684 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5685 5686 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5687 self._match(TokenType.COLUMN) 5688 column = self._parse_field(any_token=True) 5689 5690 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5691 return self.expression(exp.AlterColumn, this=column, drop=True) 5692 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5693 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5694 if self._match(TokenType.COMMENT): 5695 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5696 5697 self._match_text_seq("SET", "DATA") 5698 return self.expression( 5699 exp.AlterColumn, 5700 this=column, 5701 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5702 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5703 using=self._match(TokenType.USING) and self._parse_conjunction(), 5704 ) 5705 5706 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5707 index = self._index - 1 5708 5709 partition_exists = self._parse_exists() 5710 if self._match(TokenType.PARTITION, advance=False): 5711 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5712 5713 self._retreat(index) 5714 return self._parse_csv(self._parse_drop_column) 5715 5716 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5717 if self._match(TokenType.COLUMN): 5718 exists = self._parse_exists() 5719 old_column = self._parse_column() 5720 to = self._match_text_seq("TO") 5721 new_column = self._parse_column() 5722 5723 if old_column is None or to is None or new_column is None: 5724 return None 5725 5726 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5727 5728 self._match_text_seq("TO") 5729 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5730 5731 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5732 start = self._prev 5733 5734 if not self._match(TokenType.TABLE): 5735 return self._parse_as_command(start) 5736 5737 exists = self._parse_exists() 5738 only = self._match_text_seq("ONLY") 5739 this = self._parse_table(schema=True) 5740 5741 if self._next: 5742 self._advance() 5743 5744 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5745 if parser: 5746 actions = ensure_list(parser(self)) 5747 options = self._parse_csv(self._parse_property) 5748 5749 if not self._curr and actions: 5750 return self.expression( 5751 exp.AlterTable, 5752 this=this, 5753 exists=exists, 5754 actions=actions, 5755 only=only, 5756 options=options, 5757 ) 5758 5759 return self._parse_as_command(start) 5760 5761 def _parse_merge(self) -> exp.Merge: 5762 self._match(TokenType.INTO) 5763 target = self._parse_table() 5764 5765 if target and self._match(TokenType.ALIAS, advance=False): 5766 target.set("alias", self._parse_table_alias()) 5767 5768 self._match(TokenType.USING) 5769 using = self._parse_table() 5770 5771 self._match(TokenType.ON) 5772 on = self._parse_conjunction() 5773 5774 return self.expression( 5775 exp.Merge, 5776 this=target, 5777 using=using, 5778 on=on, 5779 expressions=self._parse_when_matched(), 5780 ) 5781 5782 def _parse_when_matched(self) -> t.List[exp.When]: 5783 whens = [] 5784 5785 while self._match(TokenType.WHEN): 5786 matched = not self._match(TokenType.NOT) 5787 self._match_text_seq("MATCHED") 5788 source = ( 5789 False 5790 if self._match_text_seq("BY", "TARGET") 5791 else self._match_text_seq("BY", "SOURCE") 5792 ) 5793 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5794 5795 self._match(TokenType.THEN) 5796 5797 if self._match(TokenType.INSERT): 5798 _this = self._parse_star() 5799 if _this: 5800 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5801 else: 5802 then = self.expression( 5803 exp.Insert, 5804 this=self._parse_value(), 5805 expression=self._match_text_seq("VALUES") and self._parse_value(), 5806 ) 5807 elif self._match(TokenType.UPDATE): 5808 expressions = self._parse_star() 5809 if expressions: 5810 then = self.expression(exp.Update, expressions=expressions) 5811 else: 5812 then = self.expression( 5813 exp.Update, 5814 expressions=self._match(TokenType.SET) 5815 and self._parse_csv(self._parse_equality), 5816 ) 5817 elif self._match(TokenType.DELETE): 5818 then = self.expression(exp.Var, this=self._prev.text) 5819 else: 5820 then = None 5821 5822 whens.append( 5823 self.expression( 5824 exp.When, 5825 matched=matched, 5826 source=source, 5827 condition=condition, 5828 then=then, 5829 ) 5830 ) 5831 return whens 5832 5833 def _parse_show(self) -> t.Optional[exp.Expression]: 5834 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5835 if parser: 5836 return parser(self) 5837 return self._parse_as_command(self._prev) 5838 5839 def _parse_set_item_assignment( 5840 self, kind: t.Optional[str] = None 5841 ) -> t.Optional[exp.Expression]: 5842 index = self._index 5843 5844 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5845 return self._parse_set_transaction(global_=kind == "GLOBAL") 5846 5847 left = self._parse_primary() or self._parse_id_var() 5848 assignment_delimiter = self._match_texts(("=", "TO")) 5849 5850 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5851 self._retreat(index) 5852 return None 5853 5854 right = self._parse_statement() or self._parse_id_var() 5855 this = self.expression(exp.EQ, this=left, expression=right) 5856 5857 return self.expression(exp.SetItem, this=this, kind=kind) 5858 5859 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5860 self._match_text_seq("TRANSACTION") 5861 characteristics = self._parse_csv( 5862 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5863 ) 5864 return self.expression( 5865 exp.SetItem, 5866 expressions=characteristics, 5867 kind="TRANSACTION", 5868 **{"global": global_}, # type: ignore 5869 ) 5870 5871 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5872 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5873 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5874 5875 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5876 index = self._index 5877 set_ = self.expression( 5878 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5879 ) 5880 5881 if self._curr: 5882 self._retreat(index) 5883 return self._parse_as_command(self._prev) 5884 5885 return set_ 5886 5887 def _parse_var_from_options( 5888 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5889 ) -> t.Optional[exp.Var]: 5890 start = self._curr 5891 if not start: 5892 return None 5893 5894 option = start.text.upper() 5895 continuations = options.get(option) 5896 5897 index = self._index 5898 self._advance() 5899 for keywords in continuations or []: 5900 if isinstance(keywords, str): 5901 keywords = (keywords,) 5902 5903 if self._match_text_seq(*keywords): 5904 option = f"{option} {' '.join(keywords)}" 5905 break 5906 else: 5907 if continuations or continuations is None: 5908 if raise_unmatched: 5909 self.raise_error(f"Unknown option {option}") 5910 5911 self._retreat(index) 5912 return None 5913 5914 return exp.var(option) 5915 5916 def _parse_as_command(self, start: Token) -> exp.Command: 5917 while self._curr: 5918 self._advance() 5919 text = self._find_sql(start, self._prev) 5920 size = len(start.text) 5921 self._warn_unsupported() 5922 return exp.Command(this=text[:size], expression=text[size:]) 5923 5924 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5925 settings = [] 5926 5927 self._match_l_paren() 5928 kind = self._parse_id_var() 5929 5930 if self._match(TokenType.L_PAREN): 5931 while True: 5932 key = self._parse_id_var() 5933 value = self._parse_primary() 5934 5935 if not key and value is None: 5936 break 5937 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5938 self._match(TokenType.R_PAREN) 5939 5940 self._match_r_paren() 5941 5942 return self.expression( 5943 exp.DictProperty, 5944 this=this, 5945 kind=kind.this if kind else None, 5946 settings=settings, 5947 ) 5948 5949 def _parse_dict_range(self, this: str) -> exp.DictRange: 5950 self._match_l_paren() 5951 has_min = self._match_text_seq("MIN") 5952 if has_min: 5953 min = self._parse_var() or self._parse_primary() 5954 self._match_text_seq("MAX") 5955 max = self._parse_var() or self._parse_primary() 5956 else: 5957 max = self._parse_var() or self._parse_primary() 5958 min = exp.Literal.number(0) 5959 self._match_r_paren() 5960 return self.expression(exp.DictRange, this=this, min=min, max=max) 5961 5962 def _parse_comprehension( 5963 self, this: t.Optional[exp.Expression] 5964 ) -> t.Optional[exp.Comprehension]: 5965 index = self._index 5966 expression = self._parse_column() 5967 if not self._match(TokenType.IN): 5968 self._retreat(index - 1) 5969 return None 5970 iterator = self._parse_column() 5971 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5972 return self.expression( 5973 exp.Comprehension, 5974 this=this, 5975 expression=expression, 5976 iterator=iterator, 5977 condition=condition, 5978 ) 5979 5980 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5981 if self._match(TokenType.HEREDOC_STRING): 5982 return self.expression(exp.Heredoc, this=self._prev.text) 5983 5984 if not self._match_text_seq("$"): 5985 return None 5986 5987 tags = ["$"] 5988 tag_text = None 5989 5990 if self._is_connected(): 5991 self._advance() 5992 tags.append(self._prev.text.upper()) 5993 else: 5994 self.raise_error("No closing $ found") 5995 5996 if tags[-1] != "$": 5997 if self._is_connected() and self._match_text_seq("$"): 5998 tag_text = tags[-1] 5999 tags.append("$") 6000 else: 6001 self.raise_error("No closing $ found") 6002 6003 heredoc_start = self._curr 6004 6005 while self._curr: 6006 if self._match_text_seq(*tags, advance=False): 6007 this = self._find_sql(heredoc_start, self._prev) 6008 self._advance(len(tags)) 6009 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6010 6011 self._advance() 6012 6013 self.raise_error(f"No closing {''.join(tags)} found") 6014 return None 6015 6016 def _find_parser( 6017 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6018 ) -> t.Optional[t.Callable]: 6019 if not self._curr: 6020 return None 6021 6022 index = self._index 6023 this = [] 6024 while True: 6025 # The current token might be multiple words 6026 curr = self._curr.text.upper() 6027 key = curr.split(" ") 6028 this.append(curr) 6029 6030 self._advance() 6031 result, trie = in_trie(trie, key) 6032 if result == TrieResult.FAILED: 6033 break 6034 6035 if result == TrieResult.EXISTS: 6036 subparser = parsers[" ".join(this)] 6037 return subparser 6038 6039 self._retreat(index) 6040 return None 6041 6042 def _match(self, token_type, advance=True, expression=None): 6043 if not self._curr: 6044 return None 6045 6046 if self._curr.token_type == token_type: 6047 if advance: 6048 self._advance() 6049 self._add_comments(expression) 6050 return True 6051 6052 return None 6053 6054 def _match_set(self, types, advance=True): 6055 if not self._curr: 6056 return None 6057 6058 if self._curr.token_type in types: 6059 if advance: 6060 self._advance() 6061 return True 6062 6063 return None 6064 6065 def _match_pair(self, token_type_a, token_type_b, advance=True): 6066 if not self._curr or not self._next: 6067 return None 6068 6069 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6070 if advance: 6071 self._advance(2) 6072 return True 6073 6074 return None 6075 6076 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6077 if not self._match(TokenType.L_PAREN, expression=expression): 6078 self.raise_error("Expecting (") 6079 6080 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6081 if not self._match(TokenType.R_PAREN, expression=expression): 6082 self.raise_error("Expecting )") 6083 6084 def _match_texts(self, texts, advance=True): 6085 if self._curr and self._curr.text.upper() in texts: 6086 if advance: 6087 self._advance() 6088 return True 6089 return None 6090 6091 def _match_text_seq(self, *texts, advance=True): 6092 index = self._index 6093 for text in texts: 6094 if self._curr and self._curr.text.upper() == text: 6095 self._advance() 6096 else: 6097 self._retreat(index) 6098 return None 6099 6100 if not advance: 6101 self._retreat(index) 6102 6103 return True 6104 6105 def _replace_lambda( 6106 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6107 ) -> t.Optional[exp.Expression]: 6108 if not node: 6109 return node 6110 6111 for column in node.find_all(exp.Column): 6112 if column.parts[0].name in lambda_variables: 6113 dot_or_id = column.to_dot() if column.table else column.this 6114 parent = column.parent 6115 6116 while isinstance(parent, exp.Dot): 6117 if not isinstance(parent.parent, exp.Dot): 6118 parent.replace(dot_or_id) 6119 break 6120 parent = parent.parent 6121 else: 6122 if column is node: 6123 node = dot_or_id 6124 else: 6125 column.replace(dot_or_id) 6126 return node 6127 6128 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6129 start = self._prev 6130 6131 # Not to be confused with TRUNCATE(number, decimals) function call 6132 if self._match(TokenType.L_PAREN): 6133 self._retreat(self._index - 2) 6134 return self._parse_function() 6135 6136 # Clickhouse supports TRUNCATE DATABASE as well 6137 is_database = self._match(TokenType.DATABASE) 6138 6139 self._match(TokenType.TABLE) 6140 6141 exists = self._parse_exists(not_=False) 6142 6143 expressions = self._parse_csv( 6144 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6145 ) 6146 6147 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6148 6149 if self._match_text_seq("RESTART", "IDENTITY"): 6150 identity = "RESTART" 6151 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6152 identity = "CONTINUE" 6153 else: 6154 identity = None 6155 6156 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6157 option = self._prev.text 6158 else: 6159 option = None 6160 6161 partition = self._parse_partition() 6162 6163 # Fallback case 6164 if self._curr: 6165 return self._parse_as_command(start) 6166 6167 return self.expression( 6168 exp.TruncateTable, 6169 expressions=expressions, 6170 is_database=is_database, 6171 exists=exists, 6172 cluster=cluster, 6173 identity=identity, 6174 option=option, 6175 partition=partition, 6176 ) 6177 6178 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6179 this = self._parse_ordered(self._parse_opclass) 6180 6181 if not self._match(TokenType.WITH): 6182 return this 6183 6184 op = self._parse_var(any_token=True) 6185 6186 return self.expression(exp.WithOperator, this=this, op=op)
24def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 25 if len(args) == 1 and args[0].is_star: 26 return exp.StarMap(this=args[0]) 27 28 keys = [] 29 values = [] 30 for i in range(0, len(args), 2): 31 keys.append(args[i]) 32 values.append(args[i + 1]) 33 34 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 51 # Default argument order is base, expression 52 this = seq_get(args, 0) 53 expression = seq_get(args, 1) 54 55 if expression: 56 if not dialect.LOG_BASE_FIRST: 57 this, expression = expression, this 58 return exp.Log(this=this, expression=expression) 59 60 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
63def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 64 def _builder(args: t.List, dialect: Dialect) -> E: 65 expression = expr_type( 66 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 67 ) 68 if len(args) > 2 and expr_type is exp.JSONExtract: 69 expression.set("expressions", args[2:]) 70 71 return expression 72 73 return _builder
86class Parser(metaclass=_Parser): 87 """ 88 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 89 90 Args: 91 error_level: The desired error level. 92 Default: ErrorLevel.IMMEDIATE 93 error_message_context: The amount of context to capture from a query string when displaying 94 the error message (in number of characters). 95 Default: 100 96 max_errors: Maximum number of error messages to include in a raised ParseError. 97 This is only relevant if error_level is ErrorLevel.RAISE. 98 Default: 3 99 """ 100 101 FUNCTIONS: t.Dict[str, t.Callable] = { 102 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 103 "CONCAT": lambda args, dialect: exp.Concat( 104 expressions=args, 105 safe=not dialect.STRICT_STRING_CONCAT, 106 coalesce=dialect.CONCAT_COALESCE, 107 ), 108 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 109 expressions=args, 110 safe=not dialect.STRICT_STRING_CONCAT, 111 coalesce=dialect.CONCAT_COALESCE, 112 ), 113 "DATE_TO_DATE_STR": lambda args: exp.Cast( 114 this=seq_get(args, 0), 115 to=exp.DataType(this=exp.DataType.Type.TEXT), 116 ), 117 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 118 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 119 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 120 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 121 "LIKE": build_like, 122 "LOG": build_logarithm, 123 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 124 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 125 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 126 "TIME_TO_TIME_STR": lambda args: exp.Cast( 127 this=seq_get(args, 0), 128 to=exp.DataType(this=exp.DataType.Type.TEXT), 129 ), 130 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 131 this=exp.Cast( 132 this=seq_get(args, 0), 133 to=exp.DataType(this=exp.DataType.Type.TEXT), 134 ), 135 start=exp.Literal.number(1), 136 length=exp.Literal.number(10), 137 ), 138 "VAR_MAP": build_var_map, 139 } 140 141 NO_PAREN_FUNCTIONS = { 142 TokenType.CURRENT_DATE: exp.CurrentDate, 143 TokenType.CURRENT_DATETIME: exp.CurrentDate, 144 TokenType.CURRENT_TIME: exp.CurrentTime, 145 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 146 TokenType.CURRENT_USER: exp.CurrentUser, 147 } 148 149 STRUCT_TYPE_TOKENS = { 150 TokenType.NESTED, 151 TokenType.OBJECT, 152 TokenType.STRUCT, 153 } 154 155 NESTED_TYPE_TOKENS = { 156 TokenType.ARRAY, 157 TokenType.LOWCARDINALITY, 158 TokenType.MAP, 159 TokenType.NULLABLE, 160 *STRUCT_TYPE_TOKENS, 161 } 162 163 ENUM_TYPE_TOKENS = { 164 TokenType.ENUM, 165 TokenType.ENUM8, 166 TokenType.ENUM16, 167 } 168 169 AGGREGATE_TYPE_TOKENS = { 170 TokenType.AGGREGATEFUNCTION, 171 TokenType.SIMPLEAGGREGATEFUNCTION, 172 } 173 174 TYPE_TOKENS = { 175 TokenType.BIT, 176 TokenType.BOOLEAN, 177 TokenType.TINYINT, 178 TokenType.UTINYINT, 179 TokenType.SMALLINT, 180 TokenType.USMALLINT, 181 TokenType.INT, 182 TokenType.UINT, 183 TokenType.BIGINT, 184 TokenType.UBIGINT, 185 TokenType.INT128, 186 TokenType.UINT128, 187 TokenType.INT256, 188 TokenType.UINT256, 189 TokenType.MEDIUMINT, 190 TokenType.UMEDIUMINT, 191 TokenType.FIXEDSTRING, 192 TokenType.FLOAT, 193 TokenType.DOUBLE, 194 TokenType.CHAR, 195 TokenType.NCHAR, 196 TokenType.VARCHAR, 197 TokenType.NVARCHAR, 198 TokenType.BPCHAR, 199 TokenType.TEXT, 200 TokenType.MEDIUMTEXT, 201 TokenType.LONGTEXT, 202 TokenType.MEDIUMBLOB, 203 TokenType.LONGBLOB, 204 TokenType.BINARY, 205 TokenType.VARBINARY, 206 TokenType.JSON, 207 TokenType.JSONB, 208 TokenType.INTERVAL, 209 TokenType.TINYBLOB, 210 TokenType.TINYTEXT, 211 TokenType.TIME, 212 TokenType.TIMETZ, 213 TokenType.TIMESTAMP, 214 TokenType.TIMESTAMP_S, 215 TokenType.TIMESTAMP_MS, 216 TokenType.TIMESTAMP_NS, 217 TokenType.TIMESTAMPTZ, 218 TokenType.TIMESTAMPLTZ, 219 TokenType.DATETIME, 220 TokenType.DATETIME64, 221 TokenType.DATE, 222 TokenType.DATE32, 223 TokenType.INT4RANGE, 224 TokenType.INT4MULTIRANGE, 225 TokenType.INT8RANGE, 226 TokenType.INT8MULTIRANGE, 227 TokenType.NUMRANGE, 228 TokenType.NUMMULTIRANGE, 229 TokenType.TSRANGE, 230 TokenType.TSMULTIRANGE, 231 TokenType.TSTZRANGE, 232 TokenType.TSTZMULTIRANGE, 233 TokenType.DATERANGE, 234 TokenType.DATEMULTIRANGE, 235 TokenType.DECIMAL, 236 TokenType.UDECIMAL, 237 TokenType.BIGDECIMAL, 238 TokenType.UUID, 239 TokenType.GEOGRAPHY, 240 TokenType.GEOMETRY, 241 TokenType.HLLSKETCH, 242 TokenType.HSTORE, 243 TokenType.PSEUDO_TYPE, 244 TokenType.SUPER, 245 TokenType.SERIAL, 246 TokenType.SMALLSERIAL, 247 TokenType.BIGSERIAL, 248 TokenType.XML, 249 TokenType.YEAR, 250 TokenType.UNIQUEIDENTIFIER, 251 TokenType.USERDEFINED, 252 TokenType.MONEY, 253 TokenType.SMALLMONEY, 254 TokenType.ROWVERSION, 255 TokenType.IMAGE, 256 TokenType.VARIANT, 257 TokenType.OBJECT, 258 TokenType.OBJECT_IDENTIFIER, 259 TokenType.INET, 260 TokenType.IPADDRESS, 261 TokenType.IPPREFIX, 262 TokenType.IPV4, 263 TokenType.IPV6, 264 TokenType.UNKNOWN, 265 TokenType.NULL, 266 TokenType.NAME, 267 *ENUM_TYPE_TOKENS, 268 *NESTED_TYPE_TOKENS, 269 *AGGREGATE_TYPE_TOKENS, 270 } 271 272 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 273 TokenType.BIGINT: TokenType.UBIGINT, 274 TokenType.INT: TokenType.UINT, 275 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 276 TokenType.SMALLINT: TokenType.USMALLINT, 277 TokenType.TINYINT: TokenType.UTINYINT, 278 TokenType.DECIMAL: TokenType.UDECIMAL, 279 } 280 281 SUBQUERY_PREDICATES = { 282 TokenType.ANY: exp.Any, 283 TokenType.ALL: exp.All, 284 TokenType.EXISTS: exp.Exists, 285 TokenType.SOME: exp.Any, 286 } 287 288 RESERVED_TOKENS = { 289 *Tokenizer.SINGLE_TOKENS.values(), 290 TokenType.SELECT, 291 } 292 293 DB_CREATABLES = { 294 TokenType.DATABASE, 295 TokenType.SCHEMA, 296 TokenType.TABLE, 297 TokenType.VIEW, 298 TokenType.MODEL, 299 TokenType.DICTIONARY, 300 TokenType.SEQUENCE, 301 TokenType.STORAGE_INTEGRATION, 302 } 303 304 CREATABLES = { 305 TokenType.COLUMN, 306 TokenType.CONSTRAINT, 307 TokenType.FUNCTION, 308 TokenType.INDEX, 309 TokenType.PROCEDURE, 310 TokenType.FOREIGN_KEY, 311 *DB_CREATABLES, 312 } 313 314 # Tokens that can represent identifiers 315 ID_VAR_TOKENS = { 316 TokenType.VAR, 317 TokenType.ANTI, 318 TokenType.APPLY, 319 TokenType.ASC, 320 TokenType.ASOF, 321 TokenType.AUTO_INCREMENT, 322 TokenType.BEGIN, 323 TokenType.BPCHAR, 324 TokenType.CACHE, 325 TokenType.CASE, 326 TokenType.COLLATE, 327 TokenType.COMMAND, 328 TokenType.COMMENT, 329 TokenType.COMMIT, 330 TokenType.CONSTRAINT, 331 TokenType.DEFAULT, 332 TokenType.DELETE, 333 TokenType.DESC, 334 TokenType.DESCRIBE, 335 TokenType.DICTIONARY, 336 TokenType.DIV, 337 TokenType.END, 338 TokenType.EXECUTE, 339 TokenType.ESCAPE, 340 TokenType.FALSE, 341 TokenType.FIRST, 342 TokenType.FILTER, 343 TokenType.FINAL, 344 TokenType.FORMAT, 345 TokenType.FULL, 346 TokenType.IS, 347 TokenType.ISNULL, 348 TokenType.INTERVAL, 349 TokenType.KEEP, 350 TokenType.KILL, 351 TokenType.LEFT, 352 TokenType.LOAD, 353 TokenType.MERGE, 354 TokenType.NATURAL, 355 TokenType.NEXT, 356 TokenType.OFFSET, 357 TokenType.OPERATOR, 358 TokenType.ORDINALITY, 359 TokenType.OVERLAPS, 360 TokenType.OVERWRITE, 361 TokenType.PARTITION, 362 TokenType.PERCENT, 363 TokenType.PIVOT, 364 TokenType.PRAGMA, 365 TokenType.RANGE, 366 TokenType.RECURSIVE, 367 TokenType.REFERENCES, 368 TokenType.REFRESH, 369 TokenType.REPLACE, 370 TokenType.RIGHT, 371 TokenType.ROW, 372 TokenType.ROWS, 373 TokenType.SEMI, 374 TokenType.SET, 375 TokenType.SETTINGS, 376 TokenType.SHOW, 377 TokenType.TEMPORARY, 378 TokenType.TOP, 379 TokenType.TRUE, 380 TokenType.TRUNCATE, 381 TokenType.UNIQUE, 382 TokenType.UNPIVOT, 383 TokenType.UPDATE, 384 TokenType.USE, 385 TokenType.VOLATILE, 386 TokenType.WINDOW, 387 *CREATABLES, 388 *SUBQUERY_PREDICATES, 389 *TYPE_TOKENS, 390 *NO_PAREN_FUNCTIONS, 391 } 392 393 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 394 395 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 396 TokenType.ANTI, 397 TokenType.APPLY, 398 TokenType.ASOF, 399 TokenType.FULL, 400 TokenType.LEFT, 401 TokenType.LOCK, 402 TokenType.NATURAL, 403 TokenType.OFFSET, 404 TokenType.RIGHT, 405 TokenType.SEMI, 406 TokenType.WINDOW, 407 } 408 409 ALIAS_TOKENS = ID_VAR_TOKENS 410 411 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 412 413 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 414 415 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 416 417 FUNC_TOKENS = { 418 TokenType.COLLATE, 419 TokenType.COMMAND, 420 TokenType.CURRENT_DATE, 421 TokenType.CURRENT_DATETIME, 422 TokenType.CURRENT_TIMESTAMP, 423 TokenType.CURRENT_TIME, 424 TokenType.CURRENT_USER, 425 TokenType.FILTER, 426 TokenType.FIRST, 427 TokenType.FORMAT, 428 TokenType.GLOB, 429 TokenType.IDENTIFIER, 430 TokenType.INDEX, 431 TokenType.ISNULL, 432 TokenType.ILIKE, 433 TokenType.INSERT, 434 TokenType.LIKE, 435 TokenType.MERGE, 436 TokenType.OFFSET, 437 TokenType.PRIMARY_KEY, 438 TokenType.RANGE, 439 TokenType.REPLACE, 440 TokenType.RLIKE, 441 TokenType.ROW, 442 TokenType.UNNEST, 443 TokenType.VAR, 444 TokenType.LEFT, 445 TokenType.RIGHT, 446 TokenType.SEQUENCE, 447 TokenType.DATE, 448 TokenType.DATETIME, 449 TokenType.TABLE, 450 TokenType.TIMESTAMP, 451 TokenType.TIMESTAMPTZ, 452 TokenType.TRUNCATE, 453 TokenType.WINDOW, 454 TokenType.XOR, 455 *TYPE_TOKENS, 456 *SUBQUERY_PREDICATES, 457 } 458 459 CONJUNCTION = { 460 TokenType.AND: exp.And, 461 TokenType.OR: exp.Or, 462 } 463 464 EQUALITY = { 465 TokenType.COLON_EQ: exp.PropertyEQ, 466 TokenType.EQ: exp.EQ, 467 TokenType.NEQ: exp.NEQ, 468 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 469 } 470 471 COMPARISON = { 472 TokenType.GT: exp.GT, 473 TokenType.GTE: exp.GTE, 474 TokenType.LT: exp.LT, 475 TokenType.LTE: exp.LTE, 476 } 477 478 BITWISE = { 479 TokenType.AMP: exp.BitwiseAnd, 480 TokenType.CARET: exp.BitwiseXor, 481 TokenType.PIPE: exp.BitwiseOr, 482 } 483 484 TERM = { 485 TokenType.DASH: exp.Sub, 486 TokenType.PLUS: exp.Add, 487 TokenType.MOD: exp.Mod, 488 TokenType.COLLATE: exp.Collate, 489 } 490 491 FACTOR = { 492 TokenType.DIV: exp.IntDiv, 493 TokenType.LR_ARROW: exp.Distance, 494 TokenType.SLASH: exp.Div, 495 TokenType.STAR: exp.Mul, 496 } 497 498 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 499 500 TIMES = { 501 TokenType.TIME, 502 TokenType.TIMETZ, 503 } 504 505 TIMESTAMPS = { 506 TokenType.TIMESTAMP, 507 TokenType.TIMESTAMPTZ, 508 TokenType.TIMESTAMPLTZ, 509 *TIMES, 510 } 511 512 SET_OPERATIONS = { 513 TokenType.UNION, 514 TokenType.INTERSECT, 515 TokenType.EXCEPT, 516 } 517 518 JOIN_METHODS = { 519 TokenType.ASOF, 520 TokenType.NATURAL, 521 TokenType.POSITIONAL, 522 } 523 524 JOIN_SIDES = { 525 TokenType.LEFT, 526 TokenType.RIGHT, 527 TokenType.FULL, 528 } 529 530 JOIN_KINDS = { 531 TokenType.INNER, 532 TokenType.OUTER, 533 TokenType.CROSS, 534 TokenType.SEMI, 535 TokenType.ANTI, 536 } 537 538 JOIN_HINTS: t.Set[str] = set() 539 540 LAMBDAS = { 541 TokenType.ARROW: lambda self, expressions: self.expression( 542 exp.Lambda, 543 this=self._replace_lambda( 544 self._parse_conjunction(), 545 {node.name for node in expressions}, 546 ), 547 expressions=expressions, 548 ), 549 TokenType.FARROW: lambda self, expressions: self.expression( 550 exp.Kwarg, 551 this=exp.var(expressions[0].name), 552 expression=self._parse_conjunction(), 553 ), 554 } 555 556 COLUMN_OPERATORS = { 557 TokenType.DOT: None, 558 TokenType.DCOLON: lambda self, this, to: self.expression( 559 exp.Cast if self.STRICT_CAST else exp.TryCast, 560 this=this, 561 to=to, 562 ), 563 TokenType.ARROW: lambda self, this, path: self.expression( 564 exp.JSONExtract, 565 this=this, 566 expression=self.dialect.to_json_path(path), 567 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 568 ), 569 TokenType.DARROW: lambda self, this, path: self.expression( 570 exp.JSONExtractScalar, 571 this=this, 572 expression=self.dialect.to_json_path(path), 573 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 574 ), 575 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 576 exp.JSONBExtract, 577 this=this, 578 expression=path, 579 ), 580 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 581 exp.JSONBExtractScalar, 582 this=this, 583 expression=path, 584 ), 585 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 586 exp.JSONBContains, 587 this=this, 588 expression=key, 589 ), 590 } 591 592 EXPRESSION_PARSERS = { 593 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 594 exp.Column: lambda self: self._parse_column(), 595 exp.Condition: lambda self: self._parse_conjunction(), 596 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 597 exp.Expression: lambda self: self._parse_expression(), 598 exp.From: lambda self: self._parse_from(), 599 exp.Group: lambda self: self._parse_group(), 600 exp.Having: lambda self: self._parse_having(), 601 exp.Identifier: lambda self: self._parse_id_var(), 602 exp.Join: lambda self: self._parse_join(), 603 exp.Lambda: lambda self: self._parse_lambda(), 604 exp.Lateral: lambda self: self._parse_lateral(), 605 exp.Limit: lambda self: self._parse_limit(), 606 exp.Offset: lambda self: self._parse_offset(), 607 exp.Order: lambda self: self._parse_order(), 608 exp.Ordered: lambda self: self._parse_ordered(), 609 exp.Properties: lambda self: self._parse_properties(), 610 exp.Qualify: lambda self: self._parse_qualify(), 611 exp.Returning: lambda self: self._parse_returning(), 612 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 613 exp.Table: lambda self: self._parse_table_parts(), 614 exp.TableAlias: lambda self: self._parse_table_alias(), 615 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 616 exp.Where: lambda self: self._parse_where(), 617 exp.Window: lambda self: self._parse_named_window(), 618 exp.With: lambda self: self._parse_with(), 619 "JOIN_TYPE": lambda self: self._parse_join_parts(), 620 } 621 622 STATEMENT_PARSERS = { 623 TokenType.ALTER: lambda self: self._parse_alter(), 624 TokenType.BEGIN: lambda self: self._parse_transaction(), 625 TokenType.CACHE: lambda self: self._parse_cache(), 626 TokenType.COMMENT: lambda self: self._parse_comment(), 627 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 628 TokenType.CREATE: lambda self: self._parse_create(), 629 TokenType.DELETE: lambda self: self._parse_delete(), 630 TokenType.DESC: lambda self: self._parse_describe(), 631 TokenType.DESCRIBE: lambda self: self._parse_describe(), 632 TokenType.DROP: lambda self: self._parse_drop(), 633 TokenType.INSERT: lambda self: self._parse_insert(), 634 TokenType.KILL: lambda self: self._parse_kill(), 635 TokenType.LOAD: lambda self: self._parse_load(), 636 TokenType.MERGE: lambda self: self._parse_merge(), 637 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 638 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 639 TokenType.REFRESH: lambda self: self._parse_refresh(), 640 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 641 TokenType.SET: lambda self: self._parse_set(), 642 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 643 TokenType.UNCACHE: lambda self: self._parse_uncache(), 644 TokenType.UPDATE: lambda self: self._parse_update(), 645 TokenType.USE: lambda self: self.expression( 646 exp.Use, 647 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 648 this=self._parse_table(schema=False), 649 ), 650 } 651 652 UNARY_PARSERS = { 653 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 654 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 655 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 656 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 657 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 658 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 659 } 660 661 STRING_PARSERS = { 662 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 663 exp.RawString, this=token.text 664 ), 665 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 666 exp.National, this=token.text 667 ), 668 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 669 TokenType.STRING: lambda self, token: self.expression( 670 exp.Literal, this=token.text, is_string=True 671 ), 672 TokenType.UNICODE_STRING: lambda self, token: self.expression( 673 exp.UnicodeString, 674 this=token.text, 675 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 676 ), 677 } 678 679 NUMERIC_PARSERS = { 680 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 681 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 682 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 683 TokenType.NUMBER: lambda self, token: self.expression( 684 exp.Literal, this=token.text, is_string=False 685 ), 686 } 687 688 PRIMARY_PARSERS = { 689 **STRING_PARSERS, 690 **NUMERIC_PARSERS, 691 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 692 TokenType.NULL: lambda self, _: self.expression(exp.Null), 693 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 694 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 695 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 696 TokenType.STAR: lambda self, _: self.expression( 697 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 698 ), 699 } 700 701 PLACEHOLDER_PARSERS = { 702 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 703 TokenType.PARAMETER: lambda self: self._parse_parameter(), 704 TokenType.COLON: lambda self: ( 705 self.expression(exp.Placeholder, this=self._prev.text) 706 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 707 else None 708 ), 709 } 710 711 RANGE_PARSERS = { 712 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 713 TokenType.GLOB: binary_range_parser(exp.Glob), 714 TokenType.ILIKE: binary_range_parser(exp.ILike), 715 TokenType.IN: lambda self, this: self._parse_in(this), 716 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 717 TokenType.IS: lambda self, this: self._parse_is(this), 718 TokenType.LIKE: binary_range_parser(exp.Like), 719 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 720 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 721 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 722 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 723 } 724 725 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 726 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 727 "AUTO": lambda self: self._parse_auto_property(), 728 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 729 "BACKUP": lambda self: self.expression( 730 exp.BackupProperty, this=self._parse_var(any_token=True) 731 ), 732 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 733 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 734 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 735 "CHECKSUM": lambda self: self._parse_checksum(), 736 "CLUSTER BY": lambda self: self._parse_cluster(), 737 "CLUSTERED": lambda self: self._parse_clustered_by(), 738 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 739 exp.CollateProperty, **kwargs 740 ), 741 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 742 "CONTAINS": lambda self: self._parse_contains_property(), 743 "COPY": lambda self: self._parse_copy_property(), 744 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 745 "DEFINER": lambda self: self._parse_definer(), 746 "DETERMINISTIC": lambda self: self.expression( 747 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 748 ), 749 "DISTKEY": lambda self: self._parse_distkey(), 750 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 751 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 752 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 753 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 754 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 755 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 756 "FREESPACE": lambda self: self._parse_freespace(), 757 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 758 "HEAP": lambda self: self.expression(exp.HeapProperty), 759 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 760 "IMMUTABLE": lambda self: self.expression( 761 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 762 ), 763 "INHERITS": lambda self: self.expression( 764 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 765 ), 766 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 767 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 768 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 769 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 770 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 771 "LIKE": lambda self: self._parse_create_like(), 772 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 773 "LOCK": lambda self: self._parse_locking(), 774 "LOCKING": lambda self: self._parse_locking(), 775 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 776 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 777 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 778 "MODIFIES": lambda self: self._parse_modifies_property(), 779 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 780 "NO": lambda self: self._parse_no_property(), 781 "ON": lambda self: self._parse_on_property(), 782 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 783 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 784 "PARTITION": lambda self: self._parse_partitioned_of(), 785 "PARTITION BY": lambda self: self._parse_partitioned_by(), 786 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 787 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 788 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 789 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 790 "READS": lambda self: self._parse_reads_property(), 791 "REMOTE": lambda self: self._parse_remote_with_connection(), 792 "RETURNS": lambda self: self._parse_returns(), 793 "ROW": lambda self: self._parse_row(), 794 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 795 "SAMPLE": lambda self: self.expression( 796 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 797 ), 798 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 799 "SETTINGS": lambda self: self.expression( 800 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 801 ), 802 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 803 "SORTKEY": lambda self: self._parse_sortkey(), 804 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 805 "STABLE": lambda self: self.expression( 806 exp.StabilityProperty, this=exp.Literal.string("STABLE") 807 ), 808 "STORED": lambda self: self._parse_stored(), 809 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 810 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 811 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 812 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 813 "TO": lambda self: self._parse_to_table(), 814 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 815 "TRANSFORM": lambda self: self.expression( 816 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 817 ), 818 "TTL": lambda self: self._parse_ttl(), 819 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 820 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 821 "VOLATILE": lambda self: self._parse_volatile_property(), 822 "WITH": lambda self: self._parse_with_property(), 823 } 824 825 CONSTRAINT_PARSERS = { 826 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 827 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 828 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 829 "CHARACTER SET": lambda self: self.expression( 830 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 831 ), 832 "CHECK": lambda self: self.expression( 833 exp.CheckColumnConstraint, 834 this=self._parse_wrapped(self._parse_conjunction), 835 enforced=self._match_text_seq("ENFORCED"), 836 ), 837 "COLLATE": lambda self: self.expression( 838 exp.CollateColumnConstraint, this=self._parse_var() 839 ), 840 "COMMENT": lambda self: self.expression( 841 exp.CommentColumnConstraint, this=self._parse_string() 842 ), 843 "COMPRESS": lambda self: self._parse_compress(), 844 "CLUSTERED": lambda self: self.expression( 845 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 846 ), 847 "NONCLUSTERED": lambda self: self.expression( 848 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 849 ), 850 "DEFAULT": lambda self: self.expression( 851 exp.DefaultColumnConstraint, this=self._parse_bitwise() 852 ), 853 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 854 "EXCLUDE": lambda self: self.expression( 855 exp.ExcludeColumnConstraint, this=self._parse_index_params() 856 ), 857 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 858 "FORMAT": lambda self: self.expression( 859 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 860 ), 861 "GENERATED": lambda self: self._parse_generated_as_identity(), 862 "IDENTITY": lambda self: self._parse_auto_increment(), 863 "INLINE": lambda self: self._parse_inline(), 864 "LIKE": lambda self: self._parse_create_like(), 865 "NOT": lambda self: self._parse_not_constraint(), 866 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 867 "ON": lambda self: ( 868 self._match(TokenType.UPDATE) 869 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 870 ) 871 or self.expression(exp.OnProperty, this=self._parse_id_var()), 872 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 873 "PERIOD": lambda self: self._parse_period_for_system_time(), 874 "PRIMARY KEY": lambda self: self._parse_primary_key(), 875 "REFERENCES": lambda self: self._parse_references(match=False), 876 "TITLE": lambda self: self.expression( 877 exp.TitleColumnConstraint, this=self._parse_var_or_string() 878 ), 879 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 880 "UNIQUE": lambda self: self._parse_unique(), 881 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 882 "WITH": lambda self: self.expression( 883 exp.Properties, expressions=self._parse_wrapped_properties() 884 ), 885 } 886 887 ALTER_PARSERS = { 888 "ADD": lambda self: self._parse_alter_table_add(), 889 "ALTER": lambda self: self._parse_alter_table_alter(), 890 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 891 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 892 "DROP": lambda self: self._parse_alter_table_drop(), 893 "RENAME": lambda self: self._parse_alter_table_rename(), 894 } 895 896 SCHEMA_UNNAMED_CONSTRAINTS = { 897 "CHECK", 898 "EXCLUDE", 899 "FOREIGN KEY", 900 "LIKE", 901 "PERIOD", 902 "PRIMARY KEY", 903 "UNIQUE", 904 } 905 906 NO_PAREN_FUNCTION_PARSERS = { 907 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 908 "CASE": lambda self: self._parse_case(), 909 "IF": lambda self: self._parse_if(), 910 "NEXT": lambda self: self._parse_next_value_for(), 911 } 912 913 INVALID_FUNC_NAME_TOKENS = { 914 TokenType.IDENTIFIER, 915 TokenType.STRING, 916 } 917 918 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 919 920 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 921 922 FUNCTION_PARSERS = { 923 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 924 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 925 "DECODE": lambda self: self._parse_decode(), 926 "EXTRACT": lambda self: self._parse_extract(), 927 "JSON_OBJECT": lambda self: self._parse_json_object(), 928 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 929 "JSON_TABLE": lambda self: self._parse_json_table(), 930 "MATCH": lambda self: self._parse_match_against(), 931 "OPENJSON": lambda self: self._parse_open_json(), 932 "POSITION": lambda self: self._parse_position(), 933 "PREDICT": lambda self: self._parse_predict(), 934 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 935 "STRING_AGG": lambda self: self._parse_string_agg(), 936 "SUBSTRING": lambda self: self._parse_substring(), 937 "TRIM": lambda self: self._parse_trim(), 938 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 939 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 940 } 941 942 QUERY_MODIFIER_PARSERS = { 943 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 944 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 945 TokenType.WHERE: lambda self: ("where", self._parse_where()), 946 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 947 TokenType.HAVING: lambda self: ("having", self._parse_having()), 948 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 949 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 950 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 951 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 952 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 953 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 954 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 955 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 956 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 957 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 958 TokenType.CLUSTER_BY: lambda self: ( 959 "cluster", 960 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 961 ), 962 TokenType.DISTRIBUTE_BY: lambda self: ( 963 "distribute", 964 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 965 ), 966 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 967 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 968 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 969 } 970 971 SET_PARSERS = { 972 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 973 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 974 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 975 "TRANSACTION": lambda self: self._parse_set_transaction(), 976 } 977 978 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 979 980 TYPE_LITERAL_PARSERS = { 981 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 982 } 983 984 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 985 986 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 987 988 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 989 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 990 "ISOLATION": ( 991 ("LEVEL", "REPEATABLE", "READ"), 992 ("LEVEL", "READ", "COMMITTED"), 993 ("LEVEL", "READ", "UNCOMITTED"), 994 ("LEVEL", "SERIALIZABLE"), 995 ), 996 "READ": ("WRITE", "ONLY"), 997 } 998 999 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1000 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1001 ) 1002 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1003 1004 CREATE_SEQUENCE: OPTIONS_TYPE = { 1005 "SCALE": ("EXTEND", "NOEXTEND"), 1006 "SHARD": ("EXTEND", "NOEXTEND"), 1007 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1008 **dict.fromkeys( 1009 ( 1010 "SESSION", 1011 "GLOBAL", 1012 "KEEP", 1013 "NOKEEP", 1014 "ORDER", 1015 "NOORDER", 1016 "NOCACHE", 1017 "CYCLE", 1018 "NOCYCLE", 1019 "NOMINVALUE", 1020 "NOMAXVALUE", 1021 "NOSCALE", 1022 "NOSHARD", 1023 ), 1024 tuple(), 1025 ), 1026 } 1027 1028 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1029 1030 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1031 1032 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1033 1034 CLONE_KEYWORDS = {"CLONE", "COPY"} 1035 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1036 1037 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1038 1039 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1040 1041 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1042 1043 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1044 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1045 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1046 1047 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1048 1049 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1050 1051 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1052 1053 DISTINCT_TOKENS = {TokenType.DISTINCT} 1054 1055 NULL_TOKENS = {TokenType.NULL} 1056 1057 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1058 1059 STRICT_CAST = True 1060 1061 PREFIXED_PIVOT_COLUMNS = False 1062 IDENTIFY_PIVOT_STRINGS = False 1063 1064 LOG_DEFAULTS_TO_LN = False 1065 1066 # Whether ADD is present for each column added by ALTER TABLE 1067 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1068 1069 # Whether the table sample clause expects CSV syntax 1070 TABLESAMPLE_CSV = False 1071 1072 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1073 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1074 1075 # Whether the TRIM function expects the characters to trim as its first argument 1076 TRIM_PATTERN_FIRST = False 1077 1078 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1079 STRING_ALIASES = False 1080 1081 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1082 MODIFIERS_ATTACHED_TO_UNION = True 1083 UNION_MODIFIERS = {"order", "limit", "offset"} 1084 1085 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1086 NO_PAREN_IF_COMMANDS = True 1087 1088 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1089 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1090 1091 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1092 # If this is True and '(' is not found, the keyword will be treated as an identifier 1093 VALUES_FOLLOWED_BY_PAREN = True 1094 1095 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1096 SUPPORTS_IMPLICIT_UNNEST = False 1097 1098 __slots__ = ( 1099 "error_level", 1100 "error_message_context", 1101 "max_errors", 1102 "dialect", 1103 "sql", 1104 "errors", 1105 "_tokens", 1106 "_index", 1107 "_curr", 1108 "_next", 1109 "_prev", 1110 "_prev_comments", 1111 ) 1112 1113 # Autofilled 1114 SHOW_TRIE: t.Dict = {} 1115 SET_TRIE: t.Dict = {} 1116 1117 def __init__( 1118 self, 1119 error_level: t.Optional[ErrorLevel] = None, 1120 error_message_context: int = 100, 1121 max_errors: int = 3, 1122 dialect: DialectType = None, 1123 ): 1124 from sqlglot.dialects import Dialect 1125 1126 self.error_level = error_level or ErrorLevel.IMMEDIATE 1127 self.error_message_context = error_message_context 1128 self.max_errors = max_errors 1129 self.dialect = Dialect.get_or_raise(dialect) 1130 self.reset() 1131 1132 def reset(self): 1133 self.sql = "" 1134 self.errors = [] 1135 self._tokens = [] 1136 self._index = 0 1137 self._curr = None 1138 self._next = None 1139 self._prev = None 1140 self._prev_comments = None 1141 1142 def parse( 1143 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1144 ) -> t.List[t.Optional[exp.Expression]]: 1145 """ 1146 Parses a list of tokens and returns a list of syntax trees, one tree 1147 per parsed SQL statement. 1148 1149 Args: 1150 raw_tokens: The list of tokens. 1151 sql: The original SQL string, used to produce helpful debug messages. 1152 1153 Returns: 1154 The list of the produced syntax trees. 1155 """ 1156 return self._parse( 1157 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1158 ) 1159 1160 def parse_into( 1161 self, 1162 expression_types: exp.IntoType, 1163 raw_tokens: t.List[Token], 1164 sql: t.Optional[str] = None, 1165 ) -> t.List[t.Optional[exp.Expression]]: 1166 """ 1167 Parses a list of tokens into a given Expression type. If a collection of Expression 1168 types is given instead, this method will try to parse the token list into each one 1169 of them, stopping at the first for which the parsing succeeds. 1170 1171 Args: 1172 expression_types: The expression type(s) to try and parse the token list into. 1173 raw_tokens: The list of tokens. 1174 sql: The original SQL string, used to produce helpful debug messages. 1175 1176 Returns: 1177 The target Expression. 1178 """ 1179 errors = [] 1180 for expression_type in ensure_list(expression_types): 1181 parser = self.EXPRESSION_PARSERS.get(expression_type) 1182 if not parser: 1183 raise TypeError(f"No parser registered for {expression_type}") 1184 1185 try: 1186 return self._parse(parser, raw_tokens, sql) 1187 except ParseError as e: 1188 e.errors[0]["into_expression"] = expression_type 1189 errors.append(e) 1190 1191 raise ParseError( 1192 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1193 errors=merge_errors(errors), 1194 ) from errors[-1] 1195 1196 def _parse( 1197 self, 1198 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1199 raw_tokens: t.List[Token], 1200 sql: t.Optional[str] = None, 1201 ) -> t.List[t.Optional[exp.Expression]]: 1202 self.reset() 1203 self.sql = sql or "" 1204 1205 total = len(raw_tokens) 1206 chunks: t.List[t.List[Token]] = [[]] 1207 1208 for i, token in enumerate(raw_tokens): 1209 if token.token_type == TokenType.SEMICOLON: 1210 if i < total - 1: 1211 chunks.append([]) 1212 else: 1213 chunks[-1].append(token) 1214 1215 expressions = [] 1216 1217 for tokens in chunks: 1218 self._index = -1 1219 self._tokens = tokens 1220 self._advance() 1221 1222 expressions.append(parse_method(self)) 1223 1224 if self._index < len(self._tokens): 1225 self.raise_error("Invalid expression / Unexpected token") 1226 1227 self.check_errors() 1228 1229 return expressions 1230 1231 def check_errors(self) -> None: 1232 """Logs or raises any found errors, depending on the chosen error level setting.""" 1233 if self.error_level == ErrorLevel.WARN: 1234 for error in self.errors: 1235 logger.error(str(error)) 1236 elif self.error_level == ErrorLevel.RAISE and self.errors: 1237 raise ParseError( 1238 concat_messages(self.errors, self.max_errors), 1239 errors=merge_errors(self.errors), 1240 ) 1241 1242 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1243 """ 1244 Appends an error in the list of recorded errors or raises it, depending on the chosen 1245 error level setting. 1246 """ 1247 token = token or self._curr or self._prev or Token.string("") 1248 start = token.start 1249 end = token.end + 1 1250 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1251 highlight = self.sql[start:end] 1252 end_context = self.sql[end : end + self.error_message_context] 1253 1254 error = ParseError.new( 1255 f"{message}. Line {token.line}, Col: {token.col}.\n" 1256 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1257 description=message, 1258 line=token.line, 1259 col=token.col, 1260 start_context=start_context, 1261 highlight=highlight, 1262 end_context=end_context, 1263 ) 1264 1265 if self.error_level == ErrorLevel.IMMEDIATE: 1266 raise error 1267 1268 self.errors.append(error) 1269 1270 def expression( 1271 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1272 ) -> E: 1273 """ 1274 Creates a new, validated Expression. 1275 1276 Args: 1277 exp_class: The expression class to instantiate. 1278 comments: An optional list of comments to attach to the expression. 1279 kwargs: The arguments to set for the expression along with their respective values. 1280 1281 Returns: 1282 The target expression. 1283 """ 1284 instance = exp_class(**kwargs) 1285 instance.add_comments(comments) if comments else self._add_comments(instance) 1286 return self.validate_expression(instance) 1287 1288 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1289 if expression and self._prev_comments: 1290 expression.add_comments(self._prev_comments) 1291 self._prev_comments = None 1292 1293 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1294 """ 1295 Validates an Expression, making sure that all its mandatory arguments are set. 1296 1297 Args: 1298 expression: The expression to validate. 1299 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1300 1301 Returns: 1302 The validated expression. 1303 """ 1304 if self.error_level != ErrorLevel.IGNORE: 1305 for error_message in expression.error_messages(args): 1306 self.raise_error(error_message) 1307 1308 return expression 1309 1310 def _find_sql(self, start: Token, end: Token) -> str: 1311 return self.sql[start.start : end.end + 1] 1312 1313 def _is_connected(self) -> bool: 1314 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1315 1316 def _advance(self, times: int = 1) -> None: 1317 self._index += times 1318 self._curr = seq_get(self._tokens, self._index) 1319 self._next = seq_get(self._tokens, self._index + 1) 1320 1321 if self._index > 0: 1322 self._prev = self._tokens[self._index - 1] 1323 self._prev_comments = self._prev.comments 1324 else: 1325 self._prev = None 1326 self._prev_comments = None 1327 1328 def _retreat(self, index: int) -> None: 1329 if index != self._index: 1330 self._advance(index - self._index) 1331 1332 def _warn_unsupported(self) -> None: 1333 if len(self._tokens) <= 1: 1334 return 1335 1336 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1337 # interested in emitting a warning for the one being currently processed. 1338 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1339 1340 logger.warning( 1341 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1342 ) 1343 1344 def _parse_command(self) -> exp.Command: 1345 self._warn_unsupported() 1346 return self.expression( 1347 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1348 ) 1349 1350 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1351 start = self._prev 1352 exists = self._parse_exists() if allow_exists else None 1353 1354 self._match(TokenType.ON) 1355 1356 kind = self._match_set(self.CREATABLES) and self._prev 1357 if not kind: 1358 return self._parse_as_command(start) 1359 1360 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1361 this = self._parse_user_defined_function(kind=kind.token_type) 1362 elif kind.token_type == TokenType.TABLE: 1363 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1364 elif kind.token_type == TokenType.COLUMN: 1365 this = self._parse_column() 1366 else: 1367 this = self._parse_id_var() 1368 1369 self._match(TokenType.IS) 1370 1371 return self.expression( 1372 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1373 ) 1374 1375 def _parse_to_table( 1376 self, 1377 ) -> exp.ToTableProperty: 1378 table = self._parse_table_parts(schema=True) 1379 return self.expression(exp.ToTableProperty, this=table) 1380 1381 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1382 def _parse_ttl(self) -> exp.Expression: 1383 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1384 this = self._parse_bitwise() 1385 1386 if self._match_text_seq("DELETE"): 1387 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1388 if self._match_text_seq("RECOMPRESS"): 1389 return self.expression( 1390 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1391 ) 1392 if self._match_text_seq("TO", "DISK"): 1393 return self.expression( 1394 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1395 ) 1396 if self._match_text_seq("TO", "VOLUME"): 1397 return self.expression( 1398 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1399 ) 1400 1401 return this 1402 1403 expressions = self._parse_csv(_parse_ttl_action) 1404 where = self._parse_where() 1405 group = self._parse_group() 1406 1407 aggregates = None 1408 if group and self._match(TokenType.SET): 1409 aggregates = self._parse_csv(self._parse_set_item) 1410 1411 return self.expression( 1412 exp.MergeTreeTTL, 1413 expressions=expressions, 1414 where=where, 1415 group=group, 1416 aggregates=aggregates, 1417 ) 1418 1419 def _parse_statement(self) -> t.Optional[exp.Expression]: 1420 if self._curr is None: 1421 return None 1422 1423 if self._match_set(self.STATEMENT_PARSERS): 1424 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1425 1426 if self._match_set(Tokenizer.COMMANDS): 1427 return self._parse_command() 1428 1429 expression = self._parse_expression() 1430 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1431 return self._parse_query_modifiers(expression) 1432 1433 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1434 start = self._prev 1435 temporary = self._match(TokenType.TEMPORARY) 1436 materialized = self._match_text_seq("MATERIALIZED") 1437 1438 kind = self._match_set(self.CREATABLES) and self._prev.text 1439 if not kind: 1440 return self._parse_as_command(start) 1441 1442 if_exists = exists or self._parse_exists() 1443 table = self._parse_table_parts( 1444 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1445 ) 1446 1447 if self._match(TokenType.L_PAREN, advance=False): 1448 expressions = self._parse_wrapped_csv(self._parse_types) 1449 else: 1450 expressions = None 1451 1452 return self.expression( 1453 exp.Drop, 1454 comments=start.comments, 1455 exists=if_exists, 1456 this=table, 1457 expressions=expressions, 1458 kind=kind, 1459 temporary=temporary, 1460 materialized=materialized, 1461 cascade=self._match_text_seq("CASCADE"), 1462 constraints=self._match_text_seq("CONSTRAINTS"), 1463 purge=self._match_text_seq("PURGE"), 1464 ) 1465 1466 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1467 return ( 1468 self._match_text_seq("IF") 1469 and (not not_ or self._match(TokenType.NOT)) 1470 and self._match(TokenType.EXISTS) 1471 ) 1472 1473 def _parse_create(self) -> exp.Create | exp.Command: 1474 # Note: this can't be None because we've matched a statement parser 1475 start = self._prev 1476 comments = self._prev_comments 1477 1478 replace = ( 1479 start.token_type == TokenType.REPLACE 1480 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1481 or self._match_pair(TokenType.OR, TokenType.ALTER) 1482 ) 1483 1484 unique = self._match(TokenType.UNIQUE) 1485 1486 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1487 self._advance() 1488 1489 properties = None 1490 create_token = self._match_set(self.CREATABLES) and self._prev 1491 1492 if not create_token: 1493 # exp.Properties.Location.POST_CREATE 1494 properties = self._parse_properties() 1495 create_token = self._match_set(self.CREATABLES) and self._prev 1496 1497 if not properties or not create_token: 1498 return self._parse_as_command(start) 1499 1500 exists = self._parse_exists(not_=True) 1501 this = None 1502 expression: t.Optional[exp.Expression] = None 1503 indexes = None 1504 no_schema_binding = None 1505 begin = None 1506 end = None 1507 clone = None 1508 1509 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1510 nonlocal properties 1511 if properties and temp_props: 1512 properties.expressions.extend(temp_props.expressions) 1513 elif temp_props: 1514 properties = temp_props 1515 1516 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1517 this = self._parse_user_defined_function(kind=create_token.token_type) 1518 1519 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1520 extend_props(self._parse_properties()) 1521 1522 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1523 1524 if not expression: 1525 if self._match(TokenType.COMMAND): 1526 expression = self._parse_as_command(self._prev) 1527 else: 1528 begin = self._match(TokenType.BEGIN) 1529 return_ = self._match_text_seq("RETURN") 1530 1531 if self._match(TokenType.STRING, advance=False): 1532 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1533 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1534 expression = self._parse_string() 1535 extend_props(self._parse_properties()) 1536 else: 1537 expression = self._parse_statement() 1538 1539 end = self._match_text_seq("END") 1540 1541 if return_: 1542 expression = self.expression(exp.Return, this=expression) 1543 elif create_token.token_type == TokenType.INDEX: 1544 this = self._parse_index(index=self._parse_id_var()) 1545 elif create_token.token_type in self.DB_CREATABLES: 1546 table_parts = self._parse_table_parts( 1547 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1548 ) 1549 1550 # exp.Properties.Location.POST_NAME 1551 self._match(TokenType.COMMA) 1552 extend_props(self._parse_properties(before=True)) 1553 1554 this = self._parse_schema(this=table_parts) 1555 1556 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1557 extend_props(self._parse_properties()) 1558 1559 self._match(TokenType.ALIAS) 1560 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1561 # exp.Properties.Location.POST_ALIAS 1562 extend_props(self._parse_properties()) 1563 1564 if create_token.token_type == TokenType.SEQUENCE: 1565 expression = self._parse_types() 1566 extend_props(self._parse_properties()) 1567 else: 1568 expression = self._parse_ddl_select() 1569 1570 if create_token.token_type == TokenType.TABLE: 1571 # exp.Properties.Location.POST_EXPRESSION 1572 extend_props(self._parse_properties()) 1573 1574 indexes = [] 1575 while True: 1576 index = self._parse_index() 1577 1578 # exp.Properties.Location.POST_INDEX 1579 extend_props(self._parse_properties()) 1580 1581 if not index: 1582 break 1583 else: 1584 self._match(TokenType.COMMA) 1585 indexes.append(index) 1586 elif create_token.token_type == TokenType.VIEW: 1587 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1588 no_schema_binding = True 1589 1590 shallow = self._match_text_seq("SHALLOW") 1591 1592 if self._match_texts(self.CLONE_KEYWORDS): 1593 copy = self._prev.text.lower() == "copy" 1594 clone = self.expression( 1595 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1596 ) 1597 1598 if self._curr: 1599 return self._parse_as_command(start) 1600 1601 return self.expression( 1602 exp.Create, 1603 comments=comments, 1604 this=this, 1605 kind=create_token.text.upper(), 1606 replace=replace, 1607 unique=unique, 1608 expression=expression, 1609 exists=exists, 1610 properties=properties, 1611 indexes=indexes, 1612 no_schema_binding=no_schema_binding, 1613 begin=begin, 1614 end=end, 1615 clone=clone, 1616 ) 1617 1618 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1619 seq = exp.SequenceProperties() 1620 1621 options = [] 1622 index = self._index 1623 1624 while self._curr: 1625 if self._match_text_seq("INCREMENT"): 1626 self._match_text_seq("BY") 1627 self._match_text_seq("=") 1628 seq.set("increment", self._parse_term()) 1629 elif self._match_text_seq("MINVALUE"): 1630 seq.set("minvalue", self._parse_term()) 1631 elif self._match_text_seq("MAXVALUE"): 1632 seq.set("maxvalue", self._parse_term()) 1633 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1634 self._match_text_seq("=") 1635 seq.set("start", self._parse_term()) 1636 elif self._match_text_seq("CACHE"): 1637 # T-SQL allows empty CACHE which is initialized dynamically 1638 seq.set("cache", self._parse_number() or True) 1639 elif self._match_text_seq("OWNED", "BY"): 1640 # "OWNED BY NONE" is the default 1641 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1642 else: 1643 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1644 if opt: 1645 options.append(opt) 1646 else: 1647 break 1648 1649 seq.set("options", options if options else None) 1650 return None if self._index == index else seq 1651 1652 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1653 # only used for teradata currently 1654 self._match(TokenType.COMMA) 1655 1656 kwargs = { 1657 "no": self._match_text_seq("NO"), 1658 "dual": self._match_text_seq("DUAL"), 1659 "before": self._match_text_seq("BEFORE"), 1660 "default": self._match_text_seq("DEFAULT"), 1661 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1662 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1663 "after": self._match_text_seq("AFTER"), 1664 "minimum": self._match_texts(("MIN", "MINIMUM")), 1665 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1666 } 1667 1668 if self._match_texts(self.PROPERTY_PARSERS): 1669 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1670 try: 1671 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1672 except TypeError: 1673 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1674 1675 return None 1676 1677 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1678 return self._parse_wrapped_csv(self._parse_property) 1679 1680 def _parse_property(self) -> t.Optional[exp.Expression]: 1681 if self._match_texts(self.PROPERTY_PARSERS): 1682 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1683 1684 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1685 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1686 1687 if self._match_text_seq("COMPOUND", "SORTKEY"): 1688 return self._parse_sortkey(compound=True) 1689 1690 if self._match_text_seq("SQL", "SECURITY"): 1691 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1692 1693 index = self._index 1694 key = self._parse_column() 1695 1696 if not self._match(TokenType.EQ): 1697 self._retreat(index) 1698 return self._parse_sequence_properties() 1699 1700 return self.expression( 1701 exp.Property, 1702 this=key.to_dot() if isinstance(key, exp.Column) else key, 1703 value=self._parse_column() or self._parse_var(any_token=True), 1704 ) 1705 1706 def _parse_stored(self) -> exp.FileFormatProperty: 1707 self._match(TokenType.ALIAS) 1708 1709 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1710 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1711 1712 return self.expression( 1713 exp.FileFormatProperty, 1714 this=( 1715 self.expression( 1716 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1717 ) 1718 if input_format or output_format 1719 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1720 ), 1721 ) 1722 1723 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1724 self._match(TokenType.EQ) 1725 self._match(TokenType.ALIAS) 1726 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1727 1728 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1729 properties = [] 1730 while True: 1731 if before: 1732 prop = self._parse_property_before() 1733 else: 1734 prop = self._parse_property() 1735 if not prop: 1736 break 1737 for p in ensure_list(prop): 1738 properties.append(p) 1739 1740 if properties: 1741 return self.expression(exp.Properties, expressions=properties) 1742 1743 return None 1744 1745 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1746 return self.expression( 1747 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1748 ) 1749 1750 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1751 if self._index >= 2: 1752 pre_volatile_token = self._tokens[self._index - 2] 1753 else: 1754 pre_volatile_token = None 1755 1756 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1757 return exp.VolatileProperty() 1758 1759 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1760 1761 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1762 self._match_pair(TokenType.EQ, TokenType.ON) 1763 1764 prop = self.expression(exp.WithSystemVersioningProperty) 1765 if self._match(TokenType.L_PAREN): 1766 self._match_text_seq("HISTORY_TABLE", "=") 1767 prop.set("this", self._parse_table_parts()) 1768 1769 if self._match(TokenType.COMMA): 1770 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1771 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1772 1773 self._match_r_paren() 1774 1775 return prop 1776 1777 def _parse_with_property( 1778 self, 1779 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1780 if self._match(TokenType.L_PAREN, advance=False): 1781 return self._parse_wrapped_properties() 1782 1783 if self._match_text_seq("JOURNAL"): 1784 return self._parse_withjournaltable() 1785 1786 if self._match_text_seq("DATA"): 1787 return self._parse_withdata(no=False) 1788 elif self._match_text_seq("NO", "DATA"): 1789 return self._parse_withdata(no=True) 1790 1791 if not self._next: 1792 return None 1793 1794 return self._parse_withisolatedloading() 1795 1796 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1797 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1798 self._match(TokenType.EQ) 1799 1800 user = self._parse_id_var() 1801 self._match(TokenType.PARAMETER) 1802 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1803 1804 if not user or not host: 1805 return None 1806 1807 return exp.DefinerProperty(this=f"{user}@{host}") 1808 1809 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1810 self._match(TokenType.TABLE) 1811 self._match(TokenType.EQ) 1812 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1813 1814 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1815 return self.expression(exp.LogProperty, no=no) 1816 1817 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1818 return self.expression(exp.JournalProperty, **kwargs) 1819 1820 def _parse_checksum(self) -> exp.ChecksumProperty: 1821 self._match(TokenType.EQ) 1822 1823 on = None 1824 if self._match(TokenType.ON): 1825 on = True 1826 elif self._match_text_seq("OFF"): 1827 on = False 1828 1829 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1830 1831 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1832 return self.expression( 1833 exp.Cluster, 1834 expressions=( 1835 self._parse_wrapped_csv(self._parse_ordered) 1836 if wrapped 1837 else self._parse_csv(self._parse_ordered) 1838 ), 1839 ) 1840 1841 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1842 self._match_text_seq("BY") 1843 1844 self._match_l_paren() 1845 expressions = self._parse_csv(self._parse_column) 1846 self._match_r_paren() 1847 1848 if self._match_text_seq("SORTED", "BY"): 1849 self._match_l_paren() 1850 sorted_by = self._parse_csv(self._parse_ordered) 1851 self._match_r_paren() 1852 else: 1853 sorted_by = None 1854 1855 self._match(TokenType.INTO) 1856 buckets = self._parse_number() 1857 self._match_text_seq("BUCKETS") 1858 1859 return self.expression( 1860 exp.ClusteredByProperty, 1861 expressions=expressions, 1862 sorted_by=sorted_by, 1863 buckets=buckets, 1864 ) 1865 1866 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1867 if not self._match_text_seq("GRANTS"): 1868 self._retreat(self._index - 1) 1869 return None 1870 1871 return self.expression(exp.CopyGrantsProperty) 1872 1873 def _parse_freespace(self) -> exp.FreespaceProperty: 1874 self._match(TokenType.EQ) 1875 return self.expression( 1876 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1877 ) 1878 1879 def _parse_mergeblockratio( 1880 self, no: bool = False, default: bool = False 1881 ) -> exp.MergeBlockRatioProperty: 1882 if self._match(TokenType.EQ): 1883 return self.expression( 1884 exp.MergeBlockRatioProperty, 1885 this=self._parse_number(), 1886 percent=self._match(TokenType.PERCENT), 1887 ) 1888 1889 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1890 1891 def _parse_datablocksize( 1892 self, 1893 default: t.Optional[bool] = None, 1894 minimum: t.Optional[bool] = None, 1895 maximum: t.Optional[bool] = None, 1896 ) -> exp.DataBlocksizeProperty: 1897 self._match(TokenType.EQ) 1898 size = self._parse_number() 1899 1900 units = None 1901 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1902 units = self._prev.text 1903 1904 return self.expression( 1905 exp.DataBlocksizeProperty, 1906 size=size, 1907 units=units, 1908 default=default, 1909 minimum=minimum, 1910 maximum=maximum, 1911 ) 1912 1913 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1914 self._match(TokenType.EQ) 1915 always = self._match_text_seq("ALWAYS") 1916 manual = self._match_text_seq("MANUAL") 1917 never = self._match_text_seq("NEVER") 1918 default = self._match_text_seq("DEFAULT") 1919 1920 autotemp = None 1921 if self._match_text_seq("AUTOTEMP"): 1922 autotemp = self._parse_schema() 1923 1924 return self.expression( 1925 exp.BlockCompressionProperty, 1926 always=always, 1927 manual=manual, 1928 never=never, 1929 default=default, 1930 autotemp=autotemp, 1931 ) 1932 1933 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1934 no = self._match_text_seq("NO") 1935 concurrent = self._match_text_seq("CONCURRENT") 1936 self._match_text_seq("ISOLATED", "LOADING") 1937 for_all = self._match_text_seq("FOR", "ALL") 1938 for_insert = self._match_text_seq("FOR", "INSERT") 1939 for_none = self._match_text_seq("FOR", "NONE") 1940 return self.expression( 1941 exp.IsolatedLoadingProperty, 1942 no=no, 1943 concurrent=concurrent, 1944 for_all=for_all, 1945 for_insert=for_insert, 1946 for_none=for_none, 1947 ) 1948 1949 def _parse_locking(self) -> exp.LockingProperty: 1950 if self._match(TokenType.TABLE): 1951 kind = "TABLE" 1952 elif self._match(TokenType.VIEW): 1953 kind = "VIEW" 1954 elif self._match(TokenType.ROW): 1955 kind = "ROW" 1956 elif self._match_text_seq("DATABASE"): 1957 kind = "DATABASE" 1958 else: 1959 kind = None 1960 1961 if kind in ("DATABASE", "TABLE", "VIEW"): 1962 this = self._parse_table_parts() 1963 else: 1964 this = None 1965 1966 if self._match(TokenType.FOR): 1967 for_or_in = "FOR" 1968 elif self._match(TokenType.IN): 1969 for_or_in = "IN" 1970 else: 1971 for_or_in = None 1972 1973 if self._match_text_seq("ACCESS"): 1974 lock_type = "ACCESS" 1975 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1976 lock_type = "EXCLUSIVE" 1977 elif self._match_text_seq("SHARE"): 1978 lock_type = "SHARE" 1979 elif self._match_text_seq("READ"): 1980 lock_type = "READ" 1981 elif self._match_text_seq("WRITE"): 1982 lock_type = "WRITE" 1983 elif self._match_text_seq("CHECKSUM"): 1984 lock_type = "CHECKSUM" 1985 else: 1986 lock_type = None 1987 1988 override = self._match_text_seq("OVERRIDE") 1989 1990 return self.expression( 1991 exp.LockingProperty, 1992 this=this, 1993 kind=kind, 1994 for_or_in=for_or_in, 1995 lock_type=lock_type, 1996 override=override, 1997 ) 1998 1999 def _parse_partition_by(self) -> t.List[exp.Expression]: 2000 if self._match(TokenType.PARTITION_BY): 2001 return self._parse_csv(self._parse_conjunction) 2002 return [] 2003 2004 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2005 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2006 if self._match_text_seq("MINVALUE"): 2007 return exp.var("MINVALUE") 2008 if self._match_text_seq("MAXVALUE"): 2009 return exp.var("MAXVALUE") 2010 return self._parse_bitwise() 2011 2012 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2013 expression = None 2014 from_expressions = None 2015 to_expressions = None 2016 2017 if self._match(TokenType.IN): 2018 this = self._parse_wrapped_csv(self._parse_bitwise) 2019 elif self._match(TokenType.FROM): 2020 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2021 self._match_text_seq("TO") 2022 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2023 elif self._match_text_seq("WITH", "(", "MODULUS"): 2024 this = self._parse_number() 2025 self._match_text_seq(",", "REMAINDER") 2026 expression = self._parse_number() 2027 self._match_r_paren() 2028 else: 2029 self.raise_error("Failed to parse partition bound spec.") 2030 2031 return self.expression( 2032 exp.PartitionBoundSpec, 2033 this=this, 2034 expression=expression, 2035 from_expressions=from_expressions, 2036 to_expressions=to_expressions, 2037 ) 2038 2039 # https://www.postgresql.org/docs/current/sql-createtable.html 2040 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2041 if not self._match_text_seq("OF"): 2042 self._retreat(self._index - 1) 2043 return None 2044 2045 this = self._parse_table(schema=True) 2046 2047 if self._match(TokenType.DEFAULT): 2048 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2049 elif self._match_text_seq("FOR", "VALUES"): 2050 expression = self._parse_partition_bound_spec() 2051 else: 2052 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2053 2054 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2055 2056 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2057 self._match(TokenType.EQ) 2058 return self.expression( 2059 exp.PartitionedByProperty, 2060 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2061 ) 2062 2063 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2064 if self._match_text_seq("AND", "STATISTICS"): 2065 statistics = True 2066 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2067 statistics = False 2068 else: 2069 statistics = None 2070 2071 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2072 2073 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2074 if self._match_text_seq("SQL"): 2075 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2076 return None 2077 2078 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2079 if self._match_text_seq("SQL", "DATA"): 2080 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2081 return None 2082 2083 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2084 if self._match_text_seq("PRIMARY", "INDEX"): 2085 return exp.NoPrimaryIndexProperty() 2086 if self._match_text_seq("SQL"): 2087 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2088 return None 2089 2090 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2091 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2092 return exp.OnCommitProperty() 2093 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2094 return exp.OnCommitProperty(delete=True) 2095 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2096 2097 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2098 if self._match_text_seq("SQL", "DATA"): 2099 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2100 return None 2101 2102 def _parse_distkey(self) -> exp.DistKeyProperty: 2103 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2104 2105 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2106 table = self._parse_table(schema=True) 2107 2108 options = [] 2109 while self._match_texts(("INCLUDING", "EXCLUDING")): 2110 this = self._prev.text.upper() 2111 2112 id_var = self._parse_id_var() 2113 if not id_var: 2114 return None 2115 2116 options.append( 2117 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2118 ) 2119 2120 return self.expression(exp.LikeProperty, this=table, expressions=options) 2121 2122 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2123 return self.expression( 2124 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2125 ) 2126 2127 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2128 self._match(TokenType.EQ) 2129 return self.expression( 2130 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2131 ) 2132 2133 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2134 self._match_text_seq("WITH", "CONNECTION") 2135 return self.expression( 2136 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2137 ) 2138 2139 def _parse_returns(self) -> exp.ReturnsProperty: 2140 value: t.Optional[exp.Expression] 2141 is_table = self._match(TokenType.TABLE) 2142 2143 if is_table: 2144 if self._match(TokenType.LT): 2145 value = self.expression( 2146 exp.Schema, 2147 this="TABLE", 2148 expressions=self._parse_csv(self._parse_struct_types), 2149 ) 2150 if not self._match(TokenType.GT): 2151 self.raise_error("Expecting >") 2152 else: 2153 value = self._parse_schema(exp.var("TABLE")) 2154 else: 2155 value = self._parse_types() 2156 2157 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2158 2159 def _parse_describe(self) -> exp.Describe: 2160 kind = self._match_set(self.CREATABLES) and self._prev.text 2161 extended = self._match_text_seq("EXTENDED") 2162 this = self._parse_table(schema=True) 2163 properties = self._parse_properties() 2164 expressions = properties.expressions if properties else None 2165 return self.expression( 2166 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2167 ) 2168 2169 def _parse_insert(self) -> exp.Insert: 2170 comments = ensure_list(self._prev_comments) 2171 hint = self._parse_hint() 2172 overwrite = self._match(TokenType.OVERWRITE) 2173 ignore = self._match(TokenType.IGNORE) 2174 local = self._match_text_seq("LOCAL") 2175 alternative = None 2176 is_function = None 2177 2178 if self._match_text_seq("DIRECTORY"): 2179 this: t.Optional[exp.Expression] = self.expression( 2180 exp.Directory, 2181 this=self._parse_var_or_string(), 2182 local=local, 2183 row_format=self._parse_row_format(match_row=True), 2184 ) 2185 else: 2186 if self._match(TokenType.OR): 2187 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2188 2189 self._match(TokenType.INTO) 2190 comments += ensure_list(self._prev_comments) 2191 self._match(TokenType.TABLE) 2192 is_function = self._match(TokenType.FUNCTION) 2193 2194 this = self._parse_table(schema=True) if not is_function else self._parse_function() 2195 2196 returning = self._parse_returning() 2197 2198 return self.expression( 2199 exp.Insert, 2200 comments=comments, 2201 hint=hint, 2202 is_function=is_function, 2203 this=this, 2204 by_name=self._match_text_seq("BY", "NAME"), 2205 exists=self._parse_exists(), 2206 partition=self._parse_partition(), 2207 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2208 and self._parse_conjunction(), 2209 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2210 conflict=self._parse_on_conflict(), 2211 returning=returning or self._parse_returning(), 2212 overwrite=overwrite, 2213 alternative=alternative, 2214 ignore=ignore, 2215 ) 2216 2217 def _parse_kill(self) -> exp.Kill: 2218 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2219 2220 return self.expression( 2221 exp.Kill, 2222 this=self._parse_primary(), 2223 kind=kind, 2224 ) 2225 2226 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2227 conflict = self._match_text_seq("ON", "CONFLICT") 2228 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2229 2230 if not conflict and not duplicate: 2231 return None 2232 2233 conflict_keys = None 2234 constraint = None 2235 2236 if conflict: 2237 if self._match_text_seq("ON", "CONSTRAINT"): 2238 constraint = self._parse_id_var() 2239 elif self._match(TokenType.L_PAREN): 2240 conflict_keys = self._parse_csv(self._parse_id_var) 2241 self._match_r_paren() 2242 2243 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2244 if self._prev.token_type == TokenType.UPDATE: 2245 self._match(TokenType.SET) 2246 expressions = self._parse_csv(self._parse_equality) 2247 else: 2248 expressions = None 2249 2250 return self.expression( 2251 exp.OnConflict, 2252 duplicate=duplicate, 2253 expressions=expressions, 2254 action=action, 2255 conflict_keys=conflict_keys, 2256 constraint=constraint, 2257 ) 2258 2259 def _parse_returning(self) -> t.Optional[exp.Returning]: 2260 if not self._match(TokenType.RETURNING): 2261 return None 2262 return self.expression( 2263 exp.Returning, 2264 expressions=self._parse_csv(self._parse_expression), 2265 into=self._match(TokenType.INTO) and self._parse_table_part(), 2266 ) 2267 2268 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2269 if not self._match(TokenType.FORMAT): 2270 return None 2271 return self._parse_row_format() 2272 2273 def _parse_row_format( 2274 self, match_row: bool = False 2275 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2276 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2277 return None 2278 2279 if self._match_text_seq("SERDE"): 2280 this = self._parse_string() 2281 2282 serde_properties = None 2283 if self._match(TokenType.SERDE_PROPERTIES): 2284 serde_properties = self.expression( 2285 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2286 ) 2287 2288 return self.expression( 2289 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2290 ) 2291 2292 self._match_text_seq("DELIMITED") 2293 2294 kwargs = {} 2295 2296 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2297 kwargs["fields"] = self._parse_string() 2298 if self._match_text_seq("ESCAPED", "BY"): 2299 kwargs["escaped"] = self._parse_string() 2300 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2301 kwargs["collection_items"] = self._parse_string() 2302 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2303 kwargs["map_keys"] = self._parse_string() 2304 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2305 kwargs["lines"] = self._parse_string() 2306 if self._match_text_seq("NULL", "DEFINED", "AS"): 2307 kwargs["null"] = self._parse_string() 2308 2309 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2310 2311 def _parse_load(self) -> exp.LoadData | exp.Command: 2312 if self._match_text_seq("DATA"): 2313 local = self._match_text_seq("LOCAL") 2314 self._match_text_seq("INPATH") 2315 inpath = self._parse_string() 2316 overwrite = self._match(TokenType.OVERWRITE) 2317 self._match_pair(TokenType.INTO, TokenType.TABLE) 2318 2319 return self.expression( 2320 exp.LoadData, 2321 this=self._parse_table(schema=True), 2322 local=local, 2323 overwrite=overwrite, 2324 inpath=inpath, 2325 partition=self._parse_partition(), 2326 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2327 serde=self._match_text_seq("SERDE") and self._parse_string(), 2328 ) 2329 return self._parse_as_command(self._prev) 2330 2331 def _parse_delete(self) -> exp.Delete: 2332 # This handles MySQL's "Multiple-Table Syntax" 2333 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2334 tables = None 2335 comments = self._prev_comments 2336 if not self._match(TokenType.FROM, advance=False): 2337 tables = self._parse_csv(self._parse_table) or None 2338 2339 returning = self._parse_returning() 2340 2341 return self.expression( 2342 exp.Delete, 2343 comments=comments, 2344 tables=tables, 2345 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2346 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2347 where=self._parse_where(), 2348 returning=returning or self._parse_returning(), 2349 limit=self._parse_limit(), 2350 ) 2351 2352 def _parse_update(self) -> exp.Update: 2353 comments = self._prev_comments 2354 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2355 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2356 returning = self._parse_returning() 2357 return self.expression( 2358 exp.Update, 2359 comments=comments, 2360 **{ # type: ignore 2361 "this": this, 2362 "expressions": expressions, 2363 "from": self._parse_from(joins=True), 2364 "where": self._parse_where(), 2365 "returning": returning or self._parse_returning(), 2366 "order": self._parse_order(), 2367 "limit": self._parse_limit(), 2368 }, 2369 ) 2370 2371 def _parse_uncache(self) -> exp.Uncache: 2372 if not self._match(TokenType.TABLE): 2373 self.raise_error("Expecting TABLE after UNCACHE") 2374 2375 return self.expression( 2376 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2377 ) 2378 2379 def _parse_cache(self) -> exp.Cache: 2380 lazy = self._match_text_seq("LAZY") 2381 self._match(TokenType.TABLE) 2382 table = self._parse_table(schema=True) 2383 2384 options = [] 2385 if self._match_text_seq("OPTIONS"): 2386 self._match_l_paren() 2387 k = self._parse_string() 2388 self._match(TokenType.EQ) 2389 v = self._parse_string() 2390 options = [k, v] 2391 self._match_r_paren() 2392 2393 self._match(TokenType.ALIAS) 2394 return self.expression( 2395 exp.Cache, 2396 this=table, 2397 lazy=lazy, 2398 options=options, 2399 expression=self._parse_select(nested=True), 2400 ) 2401 2402 def _parse_partition(self) -> t.Optional[exp.Partition]: 2403 if not self._match(TokenType.PARTITION): 2404 return None 2405 2406 return self.expression( 2407 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2408 ) 2409 2410 def _parse_value(self) -> exp.Tuple: 2411 if self._match(TokenType.L_PAREN): 2412 expressions = self._parse_csv(self._parse_expression) 2413 self._match_r_paren() 2414 return self.expression(exp.Tuple, expressions=expressions) 2415 2416 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2417 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2418 2419 def _parse_projections(self) -> t.List[exp.Expression]: 2420 return self._parse_expressions() 2421 2422 def _parse_select( 2423 self, 2424 nested: bool = False, 2425 table: bool = False, 2426 parse_subquery_alias: bool = True, 2427 parse_set_operation: bool = True, 2428 ) -> t.Optional[exp.Expression]: 2429 cte = self._parse_with() 2430 2431 if cte: 2432 this = self._parse_statement() 2433 2434 if not this: 2435 self.raise_error("Failed to parse any statement following CTE") 2436 return cte 2437 2438 if "with" in this.arg_types: 2439 this.set("with", cte) 2440 else: 2441 self.raise_error(f"{this.key} does not support CTE") 2442 this = cte 2443 2444 return this 2445 2446 # duckdb supports leading with FROM x 2447 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2448 2449 if self._match(TokenType.SELECT): 2450 comments = self._prev_comments 2451 2452 hint = self._parse_hint() 2453 all_ = self._match(TokenType.ALL) 2454 distinct = self._match_set(self.DISTINCT_TOKENS) 2455 2456 kind = ( 2457 self._match(TokenType.ALIAS) 2458 and self._match_texts(("STRUCT", "VALUE")) 2459 and self._prev.text.upper() 2460 ) 2461 2462 if distinct: 2463 distinct = self.expression( 2464 exp.Distinct, 2465 on=self._parse_value() if self._match(TokenType.ON) else None, 2466 ) 2467 2468 if all_ and distinct: 2469 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2470 2471 limit = self._parse_limit(top=True) 2472 projections = self._parse_projections() 2473 2474 this = self.expression( 2475 exp.Select, 2476 kind=kind, 2477 hint=hint, 2478 distinct=distinct, 2479 expressions=projections, 2480 limit=limit, 2481 ) 2482 this.comments = comments 2483 2484 into = self._parse_into() 2485 if into: 2486 this.set("into", into) 2487 2488 if not from_: 2489 from_ = self._parse_from() 2490 2491 if from_: 2492 this.set("from", from_) 2493 2494 this = self._parse_query_modifiers(this) 2495 elif (table or nested) and self._match(TokenType.L_PAREN): 2496 if self._match(TokenType.PIVOT): 2497 this = self._parse_simplified_pivot() 2498 elif self._match(TokenType.FROM): 2499 this = exp.select("*").from_( 2500 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2501 ) 2502 else: 2503 this = ( 2504 self._parse_table() 2505 if table 2506 else self._parse_select(nested=True, parse_set_operation=False) 2507 ) 2508 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2509 2510 self._match_r_paren() 2511 2512 # We return early here so that the UNION isn't attached to the subquery by the 2513 # following call to _parse_set_operations, but instead becomes the parent node 2514 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2515 elif self._match(TokenType.VALUES, advance=False): 2516 this = self._parse_derived_table_values() 2517 elif from_: 2518 this = exp.select("*").from_(from_.this, copy=False) 2519 else: 2520 this = None 2521 2522 if parse_set_operation: 2523 return self._parse_set_operations(this) 2524 return this 2525 2526 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2527 if not skip_with_token and not self._match(TokenType.WITH): 2528 return None 2529 2530 comments = self._prev_comments 2531 recursive = self._match(TokenType.RECURSIVE) 2532 2533 expressions = [] 2534 while True: 2535 expressions.append(self._parse_cte()) 2536 2537 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2538 break 2539 else: 2540 self._match(TokenType.WITH) 2541 2542 return self.expression( 2543 exp.With, comments=comments, expressions=expressions, recursive=recursive 2544 ) 2545 2546 def _parse_cte(self) -> exp.CTE: 2547 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2548 if not alias or not alias.this: 2549 self.raise_error("Expected CTE to have alias") 2550 2551 self._match(TokenType.ALIAS) 2552 2553 if self._match_text_seq("NOT", "MATERIALIZED"): 2554 materialized = False 2555 elif self._match_text_seq("MATERIALIZED"): 2556 materialized = True 2557 else: 2558 materialized = None 2559 2560 return self.expression( 2561 exp.CTE, 2562 this=self._parse_wrapped(self._parse_statement), 2563 alias=alias, 2564 materialized=materialized, 2565 ) 2566 2567 def _parse_table_alias( 2568 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2569 ) -> t.Optional[exp.TableAlias]: 2570 any_token = self._match(TokenType.ALIAS) 2571 alias = ( 2572 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2573 or self._parse_string_as_identifier() 2574 ) 2575 2576 index = self._index 2577 if self._match(TokenType.L_PAREN): 2578 columns = self._parse_csv(self._parse_function_parameter) 2579 self._match_r_paren() if columns else self._retreat(index) 2580 else: 2581 columns = None 2582 2583 if not alias and not columns: 2584 return None 2585 2586 return self.expression(exp.TableAlias, this=alias, columns=columns) 2587 2588 def _parse_subquery( 2589 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2590 ) -> t.Optional[exp.Subquery]: 2591 if not this: 2592 return None 2593 2594 return self.expression( 2595 exp.Subquery, 2596 this=this, 2597 pivots=self._parse_pivots(), 2598 alias=self._parse_table_alias() if parse_alias else None, 2599 ) 2600 2601 def _implicit_unnests_to_explicit(self, this: E) -> E: 2602 from sqlglot.optimizer.normalize_identifiers import ( 2603 normalize_identifiers as _norm, 2604 ) 2605 2606 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2607 for i, join in enumerate(this.args.get("joins") or []): 2608 table = join.this 2609 normalized_table = table.copy() 2610 normalized_table.meta["maybe_column"] = True 2611 normalized_table = _norm(normalized_table, dialect=self.dialect) 2612 2613 if isinstance(table, exp.Table) and not join.args.get("on"): 2614 if normalized_table.parts[0].name in refs: 2615 table_as_column = table.to_column() 2616 unnest = exp.Unnest(expressions=[table_as_column]) 2617 2618 # Table.to_column creates a parent Alias node that we want to convert to 2619 # a TableAlias and attach to the Unnest, so it matches the parser's output 2620 if isinstance(table.args.get("alias"), exp.TableAlias): 2621 table_as_column.replace(table_as_column.this) 2622 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2623 2624 table.replace(unnest) 2625 2626 refs.add(normalized_table.alias_or_name) 2627 2628 return this 2629 2630 def _parse_query_modifiers( 2631 self, this: t.Optional[exp.Expression] 2632 ) -> t.Optional[exp.Expression]: 2633 if isinstance(this, (exp.Query, exp.Table)): 2634 for join in iter(self._parse_join, None): 2635 this.append("joins", join) 2636 for lateral in iter(self._parse_lateral, None): 2637 this.append("laterals", lateral) 2638 2639 while True: 2640 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2641 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2642 key, expression = parser(self) 2643 2644 if expression: 2645 this.set(key, expression) 2646 if key == "limit": 2647 offset = expression.args.pop("offset", None) 2648 2649 if offset: 2650 offset = exp.Offset(expression=offset) 2651 this.set("offset", offset) 2652 2653 limit_by_expressions = expression.expressions 2654 expression.set("expressions", None) 2655 offset.set("expressions", limit_by_expressions) 2656 continue 2657 break 2658 2659 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2660 this = self._implicit_unnests_to_explicit(this) 2661 2662 return this 2663 2664 def _parse_hint(self) -> t.Optional[exp.Hint]: 2665 if self._match(TokenType.HINT): 2666 hints = [] 2667 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2668 hints.extend(hint) 2669 2670 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2671 self.raise_error("Expected */ after HINT") 2672 2673 return self.expression(exp.Hint, expressions=hints) 2674 2675 return None 2676 2677 def _parse_into(self) -> t.Optional[exp.Into]: 2678 if not self._match(TokenType.INTO): 2679 return None 2680 2681 temp = self._match(TokenType.TEMPORARY) 2682 unlogged = self._match_text_seq("UNLOGGED") 2683 self._match(TokenType.TABLE) 2684 2685 return self.expression( 2686 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2687 ) 2688 2689 def _parse_from( 2690 self, joins: bool = False, skip_from_token: bool = False 2691 ) -> t.Optional[exp.From]: 2692 if not skip_from_token and not self._match(TokenType.FROM): 2693 return None 2694 2695 return self.expression( 2696 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2697 ) 2698 2699 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2700 if not self._match(TokenType.MATCH_RECOGNIZE): 2701 return None 2702 2703 self._match_l_paren() 2704 2705 partition = self._parse_partition_by() 2706 order = self._parse_order() 2707 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2708 2709 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2710 rows = exp.var("ONE ROW PER MATCH") 2711 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2712 text = "ALL ROWS PER MATCH" 2713 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2714 text += " SHOW EMPTY MATCHES" 2715 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2716 text += " OMIT EMPTY MATCHES" 2717 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2718 text += " WITH UNMATCHED ROWS" 2719 rows = exp.var(text) 2720 else: 2721 rows = None 2722 2723 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2724 text = "AFTER MATCH SKIP" 2725 if self._match_text_seq("PAST", "LAST", "ROW"): 2726 text += " PAST LAST ROW" 2727 elif self._match_text_seq("TO", "NEXT", "ROW"): 2728 text += " TO NEXT ROW" 2729 elif self._match_text_seq("TO", "FIRST"): 2730 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2731 elif self._match_text_seq("TO", "LAST"): 2732 text += f" TO LAST {self._advance_any().text}" # type: ignore 2733 after = exp.var(text) 2734 else: 2735 after = None 2736 2737 if self._match_text_seq("PATTERN"): 2738 self._match_l_paren() 2739 2740 if not self._curr: 2741 self.raise_error("Expecting )", self._curr) 2742 2743 paren = 1 2744 start = self._curr 2745 2746 while self._curr and paren > 0: 2747 if self._curr.token_type == TokenType.L_PAREN: 2748 paren += 1 2749 if self._curr.token_type == TokenType.R_PAREN: 2750 paren -= 1 2751 2752 end = self._prev 2753 self._advance() 2754 2755 if paren > 0: 2756 self.raise_error("Expecting )", self._curr) 2757 2758 pattern = exp.var(self._find_sql(start, end)) 2759 else: 2760 pattern = None 2761 2762 define = ( 2763 self._parse_csv(self._parse_name_as_expression) 2764 if self._match_text_seq("DEFINE") 2765 else None 2766 ) 2767 2768 self._match_r_paren() 2769 2770 return self.expression( 2771 exp.MatchRecognize, 2772 partition_by=partition, 2773 order=order, 2774 measures=measures, 2775 rows=rows, 2776 after=after, 2777 pattern=pattern, 2778 define=define, 2779 alias=self._parse_table_alias(), 2780 ) 2781 2782 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2783 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2784 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2785 cross_apply = False 2786 2787 if cross_apply is not None: 2788 this = self._parse_select(table=True) 2789 view = None 2790 outer = None 2791 elif self._match(TokenType.LATERAL): 2792 this = self._parse_select(table=True) 2793 view = self._match(TokenType.VIEW) 2794 outer = self._match(TokenType.OUTER) 2795 else: 2796 return None 2797 2798 if not this: 2799 this = ( 2800 self._parse_unnest() 2801 or self._parse_function() 2802 or self._parse_id_var(any_token=False) 2803 ) 2804 2805 while self._match(TokenType.DOT): 2806 this = exp.Dot( 2807 this=this, 2808 expression=self._parse_function() or self._parse_id_var(any_token=False), 2809 ) 2810 2811 if view: 2812 table = self._parse_id_var(any_token=False) 2813 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2814 table_alias: t.Optional[exp.TableAlias] = self.expression( 2815 exp.TableAlias, this=table, columns=columns 2816 ) 2817 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2818 # We move the alias from the lateral's child node to the lateral itself 2819 table_alias = this.args["alias"].pop() 2820 else: 2821 table_alias = self._parse_table_alias() 2822 2823 return self.expression( 2824 exp.Lateral, 2825 this=this, 2826 view=view, 2827 outer=outer, 2828 alias=table_alias, 2829 cross_apply=cross_apply, 2830 ) 2831 2832 def _parse_join_parts( 2833 self, 2834 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2835 return ( 2836 self._match_set(self.JOIN_METHODS) and self._prev, 2837 self._match_set(self.JOIN_SIDES) and self._prev, 2838 self._match_set(self.JOIN_KINDS) and self._prev, 2839 ) 2840 2841 def _parse_join( 2842 self, skip_join_token: bool = False, parse_bracket: bool = False 2843 ) -> t.Optional[exp.Join]: 2844 if self._match(TokenType.COMMA): 2845 return self.expression(exp.Join, this=self._parse_table()) 2846 2847 index = self._index 2848 method, side, kind = self._parse_join_parts() 2849 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2850 join = self._match(TokenType.JOIN) 2851 2852 if not skip_join_token and not join: 2853 self._retreat(index) 2854 kind = None 2855 method = None 2856 side = None 2857 2858 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2859 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2860 2861 if not skip_join_token and not join and not outer_apply and not cross_apply: 2862 return None 2863 2864 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2865 2866 if method: 2867 kwargs["method"] = method.text 2868 if side: 2869 kwargs["side"] = side.text 2870 if kind: 2871 kwargs["kind"] = kind.text 2872 if hint: 2873 kwargs["hint"] = hint 2874 2875 if self._match(TokenType.ON): 2876 kwargs["on"] = self._parse_conjunction() 2877 elif self._match(TokenType.USING): 2878 kwargs["using"] = self._parse_wrapped_id_vars() 2879 elif not (kind and kind.token_type == TokenType.CROSS): 2880 index = self._index 2881 join = self._parse_join() 2882 2883 if join and self._match(TokenType.ON): 2884 kwargs["on"] = self._parse_conjunction() 2885 elif join and self._match(TokenType.USING): 2886 kwargs["using"] = self._parse_wrapped_id_vars() 2887 else: 2888 join = None 2889 self._retreat(index) 2890 2891 kwargs["this"].set("joins", [join] if join else None) 2892 2893 comments = [c for token in (method, side, kind) if token for c in token.comments] 2894 return self.expression(exp.Join, comments=comments, **kwargs) 2895 2896 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2897 this = self._parse_conjunction() 2898 2899 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2900 return this 2901 2902 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2903 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2904 2905 return this 2906 2907 def _parse_index_params(self) -> exp.IndexParameters: 2908 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2909 2910 if self._match(TokenType.L_PAREN, advance=False): 2911 columns = self._parse_wrapped_csv(self._parse_with_operator) 2912 else: 2913 columns = None 2914 2915 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2916 partition_by = self._parse_partition_by() 2917 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 2918 tablespace = ( 2919 self._parse_var(any_token=True) 2920 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 2921 else None 2922 ) 2923 where = self._parse_where() 2924 2925 return self.expression( 2926 exp.IndexParameters, 2927 using=using, 2928 columns=columns, 2929 include=include, 2930 partition_by=partition_by, 2931 where=where, 2932 with_storage=with_storage, 2933 tablespace=tablespace, 2934 ) 2935 2936 def _parse_index( 2937 self, 2938 index: t.Optional[exp.Expression] = None, 2939 ) -> t.Optional[exp.Index]: 2940 if index: 2941 unique = None 2942 primary = None 2943 amp = None 2944 2945 self._match(TokenType.ON) 2946 self._match(TokenType.TABLE) # hive 2947 table = self._parse_table_parts(schema=True) 2948 else: 2949 unique = self._match(TokenType.UNIQUE) 2950 primary = self._match_text_seq("PRIMARY") 2951 amp = self._match_text_seq("AMP") 2952 2953 if not self._match(TokenType.INDEX): 2954 return None 2955 2956 index = self._parse_id_var() 2957 table = None 2958 2959 params = self._parse_index_params() 2960 2961 return self.expression( 2962 exp.Index, 2963 this=index, 2964 table=table, 2965 unique=unique, 2966 primary=primary, 2967 amp=amp, 2968 params=params, 2969 ) 2970 2971 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2972 hints: t.List[exp.Expression] = [] 2973 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2974 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2975 hints.append( 2976 self.expression( 2977 exp.WithTableHint, 2978 expressions=self._parse_csv( 2979 lambda: self._parse_function() or self._parse_var(any_token=True) 2980 ), 2981 ) 2982 ) 2983 self._match_r_paren() 2984 else: 2985 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2986 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2987 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2988 2989 self._match_texts(("INDEX", "KEY")) 2990 if self._match(TokenType.FOR): 2991 hint.set("target", self._advance_any() and self._prev.text.upper()) 2992 2993 hint.set("expressions", self._parse_wrapped_id_vars()) 2994 hints.append(hint) 2995 2996 return hints or None 2997 2998 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2999 return ( 3000 (not schema and self._parse_function(optional_parens=False)) 3001 or self._parse_id_var(any_token=False) 3002 or self._parse_string_as_identifier() 3003 or self._parse_placeholder() 3004 ) 3005 3006 def _parse_table_parts( 3007 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3008 ) -> exp.Table: 3009 catalog = None 3010 db = None 3011 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3012 3013 while self._match(TokenType.DOT): 3014 if catalog: 3015 # This allows nesting the table in arbitrarily many dot expressions if needed 3016 table = self.expression( 3017 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3018 ) 3019 else: 3020 catalog = db 3021 db = table 3022 # "" used for tsql FROM a..b case 3023 table = self._parse_table_part(schema=schema) or "" 3024 3025 if ( 3026 wildcard 3027 and self._is_connected() 3028 and (isinstance(table, exp.Identifier) or not table) 3029 and self._match(TokenType.STAR) 3030 ): 3031 if isinstance(table, exp.Identifier): 3032 table.args["this"] += "*" 3033 else: 3034 table = exp.Identifier(this="*") 3035 3036 if is_db_reference: 3037 catalog = db 3038 db = table 3039 table = None 3040 3041 if not table and not is_db_reference: 3042 self.raise_error(f"Expected table name but got {self._curr}") 3043 if not db and is_db_reference: 3044 self.raise_error(f"Expected database name but got {self._curr}") 3045 3046 return self.expression( 3047 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 3048 ) 3049 3050 def _parse_table( 3051 self, 3052 schema: bool = False, 3053 joins: bool = False, 3054 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3055 parse_bracket: bool = False, 3056 is_db_reference: bool = False, 3057 ) -> t.Optional[exp.Expression]: 3058 lateral = self._parse_lateral() 3059 if lateral: 3060 return lateral 3061 3062 unnest = self._parse_unnest() 3063 if unnest: 3064 return unnest 3065 3066 values = self._parse_derived_table_values() 3067 if values: 3068 return values 3069 3070 subquery = self._parse_select(table=True) 3071 if subquery: 3072 if not subquery.args.get("pivots"): 3073 subquery.set("pivots", self._parse_pivots()) 3074 return subquery 3075 3076 bracket = parse_bracket and self._parse_bracket(None) 3077 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3078 3079 only = self._match(TokenType.ONLY) 3080 3081 this = t.cast( 3082 exp.Expression, 3083 bracket 3084 or self._parse_bracket( 3085 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3086 ), 3087 ) 3088 3089 if only: 3090 this.set("only", only) 3091 3092 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3093 self._match_text_seq("*") 3094 3095 if schema: 3096 return self._parse_schema(this=this) 3097 3098 version = self._parse_version() 3099 3100 if version: 3101 this.set("version", version) 3102 3103 if self.dialect.ALIAS_POST_TABLESAMPLE: 3104 table_sample = self._parse_table_sample() 3105 3106 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3107 if alias: 3108 this.set("alias", alias) 3109 3110 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3111 return self.expression( 3112 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3113 ) 3114 3115 this.set("hints", self._parse_table_hints()) 3116 3117 if not this.args.get("pivots"): 3118 this.set("pivots", self._parse_pivots()) 3119 3120 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3121 table_sample = self._parse_table_sample() 3122 3123 if table_sample: 3124 table_sample.set("this", this) 3125 this = table_sample 3126 3127 if joins: 3128 for join in iter(self._parse_join, None): 3129 this.append("joins", join) 3130 3131 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3132 this.set("ordinality", True) 3133 this.set("alias", self._parse_table_alias()) 3134 3135 return this 3136 3137 def _parse_version(self) -> t.Optional[exp.Version]: 3138 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3139 this = "TIMESTAMP" 3140 elif self._match(TokenType.VERSION_SNAPSHOT): 3141 this = "VERSION" 3142 else: 3143 return None 3144 3145 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3146 kind = self._prev.text.upper() 3147 start = self._parse_bitwise() 3148 self._match_texts(("TO", "AND")) 3149 end = self._parse_bitwise() 3150 expression: t.Optional[exp.Expression] = self.expression( 3151 exp.Tuple, expressions=[start, end] 3152 ) 3153 elif self._match_text_seq("CONTAINED", "IN"): 3154 kind = "CONTAINED IN" 3155 expression = self.expression( 3156 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3157 ) 3158 elif self._match(TokenType.ALL): 3159 kind = "ALL" 3160 expression = None 3161 else: 3162 self._match_text_seq("AS", "OF") 3163 kind = "AS OF" 3164 expression = self._parse_type() 3165 3166 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3167 3168 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3169 if not self._match(TokenType.UNNEST): 3170 return None 3171 3172 expressions = self._parse_wrapped_csv(self._parse_equality) 3173 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3174 3175 alias = self._parse_table_alias() if with_alias else None 3176 3177 if alias: 3178 if self.dialect.UNNEST_COLUMN_ONLY: 3179 if alias.args.get("columns"): 3180 self.raise_error("Unexpected extra column alias in unnest.") 3181 3182 alias.set("columns", [alias.this]) 3183 alias.set("this", None) 3184 3185 columns = alias.args.get("columns") or [] 3186 if offset and len(expressions) < len(columns): 3187 offset = columns.pop() 3188 3189 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3190 self._match(TokenType.ALIAS) 3191 offset = self._parse_id_var( 3192 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3193 ) or exp.to_identifier("offset") 3194 3195 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3196 3197 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3198 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3199 if not is_derived and not self._match_text_seq("VALUES"): 3200 return None 3201 3202 expressions = self._parse_csv(self._parse_value) 3203 alias = self._parse_table_alias() 3204 3205 if is_derived: 3206 self._match_r_paren() 3207 3208 return self.expression( 3209 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3210 ) 3211 3212 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3213 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3214 as_modifier and self._match_text_seq("USING", "SAMPLE") 3215 ): 3216 return None 3217 3218 bucket_numerator = None 3219 bucket_denominator = None 3220 bucket_field = None 3221 percent = None 3222 size = None 3223 seed = None 3224 3225 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3226 matched_l_paren = self._match(TokenType.L_PAREN) 3227 3228 if self.TABLESAMPLE_CSV: 3229 num = None 3230 expressions = self._parse_csv(self._parse_primary) 3231 else: 3232 expressions = None 3233 num = ( 3234 self._parse_factor() 3235 if self._match(TokenType.NUMBER, advance=False) 3236 else self._parse_primary() or self._parse_placeholder() 3237 ) 3238 3239 if self._match_text_seq("BUCKET"): 3240 bucket_numerator = self._parse_number() 3241 self._match_text_seq("OUT", "OF") 3242 bucket_denominator = bucket_denominator = self._parse_number() 3243 self._match(TokenType.ON) 3244 bucket_field = self._parse_field() 3245 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3246 percent = num 3247 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3248 size = num 3249 else: 3250 percent = num 3251 3252 if matched_l_paren: 3253 self._match_r_paren() 3254 3255 if self._match(TokenType.L_PAREN): 3256 method = self._parse_var(upper=True) 3257 seed = self._match(TokenType.COMMA) and self._parse_number() 3258 self._match_r_paren() 3259 elif self._match_texts(("SEED", "REPEATABLE")): 3260 seed = self._parse_wrapped(self._parse_number) 3261 3262 return self.expression( 3263 exp.TableSample, 3264 expressions=expressions, 3265 method=method, 3266 bucket_numerator=bucket_numerator, 3267 bucket_denominator=bucket_denominator, 3268 bucket_field=bucket_field, 3269 percent=percent, 3270 size=size, 3271 seed=seed, 3272 ) 3273 3274 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3275 return list(iter(self._parse_pivot, None)) or None 3276 3277 # https://duckdb.org/docs/sql/statements/pivot 3278 def _parse_simplified_pivot(self) -> exp.Pivot: 3279 def _parse_on() -> t.Optional[exp.Expression]: 3280 this = self._parse_bitwise() 3281 return self._parse_in(this) if self._match(TokenType.IN) else this 3282 3283 this = self._parse_table() 3284 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3285 using = self._match(TokenType.USING) and self._parse_csv( 3286 lambda: self._parse_alias(self._parse_function()) 3287 ) 3288 group = self._parse_group() 3289 return self.expression( 3290 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3291 ) 3292 3293 def _parse_pivot_in(self) -> exp.In: 3294 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3295 this = self._parse_conjunction() 3296 3297 self._match(TokenType.ALIAS) 3298 alias = self._parse_field() 3299 if alias: 3300 return self.expression(exp.PivotAlias, this=this, alias=alias) 3301 3302 return this 3303 3304 value = self._parse_column() 3305 3306 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3307 self.raise_error("Expecting IN (") 3308 3309 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3310 3311 self._match_r_paren() 3312 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3313 3314 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3315 index = self._index 3316 include_nulls = None 3317 3318 if self._match(TokenType.PIVOT): 3319 unpivot = False 3320 elif self._match(TokenType.UNPIVOT): 3321 unpivot = True 3322 3323 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3324 if self._match_text_seq("INCLUDE", "NULLS"): 3325 include_nulls = True 3326 elif self._match_text_seq("EXCLUDE", "NULLS"): 3327 include_nulls = False 3328 else: 3329 return None 3330 3331 expressions = [] 3332 3333 if not self._match(TokenType.L_PAREN): 3334 self._retreat(index) 3335 return None 3336 3337 if unpivot: 3338 expressions = self._parse_csv(self._parse_column) 3339 else: 3340 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3341 3342 if not expressions: 3343 self.raise_error("Failed to parse PIVOT's aggregation list") 3344 3345 if not self._match(TokenType.FOR): 3346 self.raise_error("Expecting FOR") 3347 3348 field = self._parse_pivot_in() 3349 3350 self._match_r_paren() 3351 3352 pivot = self.expression( 3353 exp.Pivot, 3354 expressions=expressions, 3355 field=field, 3356 unpivot=unpivot, 3357 include_nulls=include_nulls, 3358 ) 3359 3360 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3361 pivot.set("alias", self._parse_table_alias()) 3362 3363 if not unpivot: 3364 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3365 3366 columns: t.List[exp.Expression] = [] 3367 for fld in pivot.args["field"].expressions: 3368 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3369 for name in names: 3370 if self.PREFIXED_PIVOT_COLUMNS: 3371 name = f"{name}_{field_name}" if name else field_name 3372 else: 3373 name = f"{field_name}_{name}" if name else field_name 3374 3375 columns.append(exp.to_identifier(name)) 3376 3377 pivot.set("columns", columns) 3378 3379 return pivot 3380 3381 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3382 return [agg.alias for agg in aggregations] 3383 3384 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3385 if not skip_where_token and not self._match(TokenType.PREWHERE): 3386 return None 3387 3388 return self.expression( 3389 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3390 ) 3391 3392 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3393 if not skip_where_token and not self._match(TokenType.WHERE): 3394 return None 3395 3396 return self.expression( 3397 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3398 ) 3399 3400 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3401 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3402 return None 3403 3404 elements = defaultdict(list) 3405 3406 if self._match(TokenType.ALL): 3407 return self.expression(exp.Group, all=True) 3408 3409 while True: 3410 expressions = self._parse_csv(self._parse_conjunction) 3411 if expressions: 3412 elements["expressions"].extend(expressions) 3413 3414 grouping_sets = self._parse_grouping_sets() 3415 if grouping_sets: 3416 elements["grouping_sets"].extend(grouping_sets) 3417 3418 rollup = None 3419 cube = None 3420 totals = None 3421 3422 index = self._index 3423 with_ = self._match(TokenType.WITH) 3424 if self._match(TokenType.ROLLUP): 3425 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3426 elements["rollup"].extend(ensure_list(rollup)) 3427 3428 if self._match(TokenType.CUBE): 3429 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3430 elements["cube"].extend(ensure_list(cube)) 3431 3432 if self._match_text_seq("TOTALS"): 3433 totals = True 3434 elements["totals"] = True # type: ignore 3435 3436 if not (grouping_sets or rollup or cube or totals): 3437 if with_: 3438 self._retreat(index) 3439 break 3440 3441 return self.expression(exp.Group, **elements) # type: ignore 3442 3443 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3444 if not self._match(TokenType.GROUPING_SETS): 3445 return None 3446 3447 return self._parse_wrapped_csv(self._parse_grouping_set) 3448 3449 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3450 if self._match(TokenType.L_PAREN): 3451 grouping_set = self._parse_csv(self._parse_column) 3452 self._match_r_paren() 3453 return self.expression(exp.Tuple, expressions=grouping_set) 3454 3455 return self._parse_column() 3456 3457 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3458 if not skip_having_token and not self._match(TokenType.HAVING): 3459 return None 3460 return self.expression(exp.Having, this=self._parse_conjunction()) 3461 3462 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3463 if not self._match(TokenType.QUALIFY): 3464 return None 3465 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3466 3467 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3468 if skip_start_token: 3469 start = None 3470 elif self._match(TokenType.START_WITH): 3471 start = self._parse_conjunction() 3472 else: 3473 return None 3474 3475 self._match(TokenType.CONNECT_BY) 3476 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3477 exp.Prior, this=self._parse_bitwise() 3478 ) 3479 connect = self._parse_conjunction() 3480 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3481 3482 if not start and self._match(TokenType.START_WITH): 3483 start = self._parse_conjunction() 3484 3485 return self.expression(exp.Connect, start=start, connect=connect) 3486 3487 def _parse_name_as_expression(self) -> exp.Alias: 3488 return self.expression( 3489 exp.Alias, 3490 alias=self._parse_id_var(any_token=True), 3491 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3492 ) 3493 3494 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3495 if self._match_text_seq("INTERPOLATE"): 3496 return self._parse_wrapped_csv(self._parse_name_as_expression) 3497 return None 3498 3499 def _parse_order( 3500 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3501 ) -> t.Optional[exp.Expression]: 3502 siblings = None 3503 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3504 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3505 return this 3506 3507 siblings = True 3508 3509 return self.expression( 3510 exp.Order, 3511 this=this, 3512 expressions=self._parse_csv(self._parse_ordered), 3513 interpolate=self._parse_interpolate(), 3514 siblings=siblings, 3515 ) 3516 3517 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3518 if not self._match(token): 3519 return None 3520 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3521 3522 def _parse_ordered( 3523 self, parse_method: t.Optional[t.Callable] = None 3524 ) -> t.Optional[exp.Ordered]: 3525 this = parse_method() if parse_method else self._parse_conjunction() 3526 if not this: 3527 return None 3528 3529 asc = self._match(TokenType.ASC) 3530 desc = self._match(TokenType.DESC) or (asc and False) 3531 3532 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3533 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3534 3535 nulls_first = is_nulls_first or False 3536 explicitly_null_ordered = is_nulls_first or is_nulls_last 3537 3538 if ( 3539 not explicitly_null_ordered 3540 and ( 3541 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3542 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3543 ) 3544 and self.dialect.NULL_ORDERING != "nulls_are_last" 3545 ): 3546 nulls_first = True 3547 3548 if self._match_text_seq("WITH", "FILL"): 3549 with_fill = self.expression( 3550 exp.WithFill, 3551 **{ # type: ignore 3552 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3553 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3554 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3555 }, 3556 ) 3557 else: 3558 with_fill = None 3559 3560 return self.expression( 3561 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3562 ) 3563 3564 def _parse_limit( 3565 self, 3566 this: t.Optional[exp.Expression] = None, 3567 top: bool = False, 3568 skip_limit_token: bool = False, 3569 ) -> t.Optional[exp.Expression]: 3570 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3571 comments = self._prev_comments 3572 if top: 3573 limit_paren = self._match(TokenType.L_PAREN) 3574 expression = self._parse_term() if limit_paren else self._parse_number() 3575 3576 if limit_paren: 3577 self._match_r_paren() 3578 else: 3579 expression = self._parse_term() 3580 3581 if self._match(TokenType.COMMA): 3582 offset = expression 3583 expression = self._parse_term() 3584 else: 3585 offset = None 3586 3587 limit_exp = self.expression( 3588 exp.Limit, 3589 this=this, 3590 expression=expression, 3591 offset=offset, 3592 comments=comments, 3593 expressions=self._parse_limit_by(), 3594 ) 3595 3596 return limit_exp 3597 3598 if self._match(TokenType.FETCH): 3599 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3600 direction = self._prev.text.upper() if direction else "FIRST" 3601 3602 count = self._parse_field(tokens=self.FETCH_TOKENS) 3603 percent = self._match(TokenType.PERCENT) 3604 3605 self._match_set((TokenType.ROW, TokenType.ROWS)) 3606 3607 only = self._match_text_seq("ONLY") 3608 with_ties = self._match_text_seq("WITH", "TIES") 3609 3610 if only and with_ties: 3611 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3612 3613 return self.expression( 3614 exp.Fetch, 3615 direction=direction, 3616 count=count, 3617 percent=percent, 3618 with_ties=with_ties, 3619 ) 3620 3621 return this 3622 3623 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3624 if not self._match(TokenType.OFFSET): 3625 return this 3626 3627 count = self._parse_term() 3628 self._match_set((TokenType.ROW, TokenType.ROWS)) 3629 3630 return self.expression( 3631 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3632 ) 3633 3634 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3635 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3636 3637 def _parse_locks(self) -> t.List[exp.Lock]: 3638 locks = [] 3639 while True: 3640 if self._match_text_seq("FOR", "UPDATE"): 3641 update = True 3642 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3643 "LOCK", "IN", "SHARE", "MODE" 3644 ): 3645 update = False 3646 else: 3647 break 3648 3649 expressions = None 3650 if self._match_text_seq("OF"): 3651 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3652 3653 wait: t.Optional[bool | exp.Expression] = None 3654 if self._match_text_seq("NOWAIT"): 3655 wait = True 3656 elif self._match_text_seq("WAIT"): 3657 wait = self._parse_primary() 3658 elif self._match_text_seq("SKIP", "LOCKED"): 3659 wait = False 3660 3661 locks.append( 3662 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3663 ) 3664 3665 return locks 3666 3667 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3668 while this and self._match_set(self.SET_OPERATIONS): 3669 token_type = self._prev.token_type 3670 3671 if token_type == TokenType.UNION: 3672 operation = exp.Union 3673 elif token_type == TokenType.EXCEPT: 3674 operation = exp.Except 3675 else: 3676 operation = exp.Intersect 3677 3678 comments = self._prev.comments 3679 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3680 by_name = self._match_text_seq("BY", "NAME") 3681 expression = self._parse_select(nested=True, parse_set_operation=False) 3682 3683 this = self.expression( 3684 operation, 3685 comments=comments, 3686 this=this, 3687 distinct=distinct, 3688 by_name=by_name, 3689 expression=expression, 3690 ) 3691 3692 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3693 expression = this.expression 3694 3695 if expression: 3696 for arg in self.UNION_MODIFIERS: 3697 expr = expression.args.get(arg) 3698 if expr: 3699 this.set(arg, expr.pop()) 3700 3701 return this 3702 3703 def _parse_expression(self) -> t.Optional[exp.Expression]: 3704 return self._parse_alias(self._parse_conjunction()) 3705 3706 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3707 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3708 3709 def _parse_equality(self) -> t.Optional[exp.Expression]: 3710 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3711 3712 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3713 return self._parse_tokens(self._parse_range, self.COMPARISON) 3714 3715 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3716 this = this or self._parse_bitwise() 3717 negate = self._match(TokenType.NOT) 3718 3719 if self._match_set(self.RANGE_PARSERS): 3720 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3721 if not expression: 3722 return this 3723 3724 this = expression 3725 elif self._match(TokenType.ISNULL): 3726 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3727 3728 # Postgres supports ISNULL and NOTNULL for conditions. 3729 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3730 if self._match(TokenType.NOTNULL): 3731 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3732 this = self.expression(exp.Not, this=this) 3733 3734 if negate: 3735 this = self.expression(exp.Not, this=this) 3736 3737 if self._match(TokenType.IS): 3738 this = self._parse_is(this) 3739 3740 return this 3741 3742 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3743 index = self._index - 1 3744 negate = self._match(TokenType.NOT) 3745 3746 if self._match_text_seq("DISTINCT", "FROM"): 3747 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3748 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3749 3750 expression = self._parse_null() or self._parse_boolean() 3751 if not expression: 3752 self._retreat(index) 3753 return None 3754 3755 this = self.expression(exp.Is, this=this, expression=expression) 3756 return self.expression(exp.Not, this=this) if negate else this 3757 3758 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3759 unnest = self._parse_unnest(with_alias=False) 3760 if unnest: 3761 this = self.expression(exp.In, this=this, unnest=unnest) 3762 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3763 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3764 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3765 3766 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3767 this = self.expression(exp.In, this=this, query=expressions[0]) 3768 else: 3769 this = self.expression(exp.In, this=this, expressions=expressions) 3770 3771 if matched_l_paren: 3772 self._match_r_paren(this) 3773 elif not self._match(TokenType.R_BRACKET, expression=this): 3774 self.raise_error("Expecting ]") 3775 else: 3776 this = self.expression(exp.In, this=this, field=self._parse_field()) 3777 3778 return this 3779 3780 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3781 low = self._parse_bitwise() 3782 self._match(TokenType.AND) 3783 high = self._parse_bitwise() 3784 return self.expression(exp.Between, this=this, low=low, high=high) 3785 3786 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3787 if not self._match(TokenType.ESCAPE): 3788 return this 3789 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3790 3791 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3792 index = self._index 3793 3794 if not self._match(TokenType.INTERVAL) and match_interval: 3795 return None 3796 3797 if self._match(TokenType.STRING, advance=False): 3798 this = self._parse_primary() 3799 else: 3800 this = self._parse_term() 3801 3802 if not this or ( 3803 isinstance(this, exp.Column) 3804 and not this.table 3805 and not this.this.quoted 3806 and this.name.upper() == "IS" 3807 ): 3808 self._retreat(index) 3809 return None 3810 3811 unit = self._parse_function() or ( 3812 not self._match(TokenType.ALIAS, advance=False) 3813 and self._parse_var(any_token=True, upper=True) 3814 ) 3815 3816 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3817 # each INTERVAL expression into this canonical form so it's easy to transpile 3818 if this and this.is_number: 3819 this = exp.Literal.string(this.name) 3820 elif this and this.is_string: 3821 parts = this.name.split() 3822 3823 if len(parts) == 2: 3824 if unit: 3825 # This is not actually a unit, it's something else (e.g. a "window side") 3826 unit = None 3827 self._retreat(self._index - 1) 3828 3829 this = exp.Literal.string(parts[0]) 3830 unit = self.expression(exp.Var, this=parts[1].upper()) 3831 3832 return self.expression(exp.Interval, this=this, unit=unit) 3833 3834 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3835 this = self._parse_term() 3836 3837 while True: 3838 if self._match_set(self.BITWISE): 3839 this = self.expression( 3840 self.BITWISE[self._prev.token_type], 3841 this=this, 3842 expression=self._parse_term(), 3843 ) 3844 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3845 this = self.expression( 3846 exp.DPipe, 3847 this=this, 3848 expression=self._parse_term(), 3849 safe=not self.dialect.STRICT_STRING_CONCAT, 3850 ) 3851 elif self._match(TokenType.DQMARK): 3852 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3853 elif self._match_pair(TokenType.LT, TokenType.LT): 3854 this = self.expression( 3855 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3856 ) 3857 elif self._match_pair(TokenType.GT, TokenType.GT): 3858 this = self.expression( 3859 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3860 ) 3861 else: 3862 break 3863 3864 return this 3865 3866 def _parse_term(self) -> t.Optional[exp.Expression]: 3867 return self._parse_tokens(self._parse_factor, self.TERM) 3868 3869 def _parse_factor(self) -> t.Optional[exp.Expression]: 3870 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3871 this = parse_method() 3872 3873 while self._match_set(self.FACTOR): 3874 this = self.expression( 3875 self.FACTOR[self._prev.token_type], 3876 this=this, 3877 comments=self._prev_comments, 3878 expression=parse_method(), 3879 ) 3880 if isinstance(this, exp.Div): 3881 this.args["typed"] = self.dialect.TYPED_DIVISION 3882 this.args["safe"] = self.dialect.SAFE_DIVISION 3883 3884 return this 3885 3886 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3887 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3888 3889 def _parse_unary(self) -> t.Optional[exp.Expression]: 3890 if self._match_set(self.UNARY_PARSERS): 3891 return self.UNARY_PARSERS[self._prev.token_type](self) 3892 return self._parse_at_time_zone(self._parse_type()) 3893 3894 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3895 interval = parse_interval and self._parse_interval() 3896 if interval: 3897 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3898 while True: 3899 index = self._index 3900 self._match(TokenType.PLUS) 3901 3902 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3903 self._retreat(index) 3904 break 3905 3906 interval = self.expression( # type: ignore 3907 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3908 ) 3909 3910 return interval 3911 3912 index = self._index 3913 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3914 this = self._parse_column() 3915 3916 if data_type: 3917 if isinstance(this, exp.Literal): 3918 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3919 if parser: 3920 return parser(self, this, data_type) 3921 return self.expression(exp.Cast, this=this, to=data_type) 3922 if not data_type.expressions: 3923 self._retreat(index) 3924 return self._parse_column() 3925 return self._parse_column_ops(data_type) 3926 3927 return this and self._parse_column_ops(this) 3928 3929 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3930 this = self._parse_type() 3931 if not this: 3932 return None 3933 3934 if isinstance(this, exp.Column) and not this.table: 3935 this = exp.var(this.name.upper()) 3936 3937 return self.expression( 3938 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3939 ) 3940 3941 def _parse_types( 3942 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3943 ) -> t.Optional[exp.Expression]: 3944 index = self._index 3945 3946 prefix = self._match_text_seq("SYSUDTLIB", ".") 3947 3948 if not self._match_set(self.TYPE_TOKENS): 3949 identifier = allow_identifiers and self._parse_id_var( 3950 any_token=False, tokens=(TokenType.VAR,) 3951 ) 3952 if identifier: 3953 tokens = self.dialect.tokenize(identifier.name) 3954 3955 if len(tokens) != 1: 3956 self.raise_error("Unexpected identifier", self._prev) 3957 3958 if tokens[0].token_type in self.TYPE_TOKENS: 3959 self._prev = tokens[0] 3960 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3961 type_name = identifier.name 3962 3963 while self._match(TokenType.DOT): 3964 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3965 3966 return exp.DataType.build(type_name, udt=True) 3967 else: 3968 self._retreat(self._index - 1) 3969 return None 3970 else: 3971 return None 3972 3973 type_token = self._prev.token_type 3974 3975 if type_token == TokenType.PSEUDO_TYPE: 3976 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3977 3978 if type_token == TokenType.OBJECT_IDENTIFIER: 3979 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3980 3981 nested = type_token in self.NESTED_TYPE_TOKENS 3982 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3983 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3984 expressions = None 3985 maybe_func = False 3986 3987 if self._match(TokenType.L_PAREN): 3988 if is_struct: 3989 expressions = self._parse_csv(self._parse_struct_types) 3990 elif nested: 3991 expressions = self._parse_csv( 3992 lambda: self._parse_types( 3993 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3994 ) 3995 ) 3996 elif type_token in self.ENUM_TYPE_TOKENS: 3997 expressions = self._parse_csv(self._parse_equality) 3998 elif is_aggregate: 3999 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4000 any_token=False, tokens=(TokenType.VAR,) 4001 ) 4002 if not func_or_ident or not self._match(TokenType.COMMA): 4003 return None 4004 expressions = self._parse_csv( 4005 lambda: self._parse_types( 4006 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4007 ) 4008 ) 4009 expressions.insert(0, func_or_ident) 4010 else: 4011 expressions = self._parse_csv(self._parse_type_size) 4012 4013 if not expressions or not self._match(TokenType.R_PAREN): 4014 self._retreat(index) 4015 return None 4016 4017 maybe_func = True 4018 4019 this: t.Optional[exp.Expression] = None 4020 values: t.Optional[t.List[exp.Expression]] = None 4021 4022 if nested and self._match(TokenType.LT): 4023 if is_struct: 4024 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4025 else: 4026 expressions = self._parse_csv( 4027 lambda: self._parse_types( 4028 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4029 ) 4030 ) 4031 4032 if not self._match(TokenType.GT): 4033 self.raise_error("Expecting >") 4034 4035 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4036 values = self._parse_csv(self._parse_conjunction) 4037 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4038 4039 if type_token in self.TIMESTAMPS: 4040 if self._match_text_seq("WITH", "TIME", "ZONE"): 4041 maybe_func = False 4042 tz_type = ( 4043 exp.DataType.Type.TIMETZ 4044 if type_token in self.TIMES 4045 else exp.DataType.Type.TIMESTAMPTZ 4046 ) 4047 this = exp.DataType(this=tz_type, expressions=expressions) 4048 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4049 maybe_func = False 4050 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4051 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4052 maybe_func = False 4053 elif type_token == TokenType.INTERVAL: 4054 unit = self._parse_var() 4055 4056 if self._match_text_seq("TO"): 4057 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 4058 else: 4059 span = None 4060 4061 if span or not unit: 4062 this = self.expression( 4063 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 4064 ) 4065 else: 4066 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4067 4068 if maybe_func and check_func: 4069 index2 = self._index 4070 peek = self._parse_string() 4071 4072 if not peek: 4073 self._retreat(index) 4074 return None 4075 4076 self._retreat(index2) 4077 4078 if not this: 4079 if self._match_text_seq("UNSIGNED"): 4080 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4081 if not unsigned_type_token: 4082 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4083 4084 type_token = unsigned_type_token or type_token 4085 4086 this = exp.DataType( 4087 this=exp.DataType.Type[type_token.value], 4088 expressions=expressions, 4089 nested=nested, 4090 values=values, 4091 prefix=prefix, 4092 ) 4093 4094 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4095 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4096 4097 return this 4098 4099 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4100 index = self._index 4101 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4102 self._match(TokenType.COLON) 4103 column_def = self._parse_column_def(this) 4104 4105 if type_required and ( 4106 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4107 ): 4108 self._retreat(index) 4109 return self._parse_types() 4110 4111 return column_def 4112 4113 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4114 if not self._match_text_seq("AT", "TIME", "ZONE"): 4115 return this 4116 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4117 4118 def _parse_column(self) -> t.Optional[exp.Expression]: 4119 this = self._parse_column_reference() 4120 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4121 4122 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4123 this = self._parse_field() 4124 if ( 4125 not this 4126 and self._match(TokenType.VALUES, advance=False) 4127 and self.VALUES_FOLLOWED_BY_PAREN 4128 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4129 ): 4130 this = self._parse_id_var() 4131 4132 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 4133 4134 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4135 this = self._parse_bracket(this) 4136 4137 while self._match_set(self.COLUMN_OPERATORS): 4138 op_token = self._prev.token_type 4139 op = self.COLUMN_OPERATORS.get(op_token) 4140 4141 if op_token == TokenType.DCOLON: 4142 field = self._parse_types() 4143 if not field: 4144 self.raise_error("Expected type") 4145 elif op and self._curr: 4146 field = self._parse_column_reference() 4147 else: 4148 field = self._parse_field(anonymous_func=True, any_token=True) 4149 4150 if isinstance(field, exp.Func) and this: 4151 # bigquery allows function calls like x.y.count(...) 4152 # SAFE.SUBSTR(...) 4153 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4154 this = exp.replace_tree( 4155 this, 4156 lambda n: ( 4157 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4158 if n.table 4159 else n.this 4160 ) 4161 if isinstance(n, exp.Column) 4162 else n, 4163 ) 4164 4165 if op: 4166 this = op(self, this, field) 4167 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4168 this = self.expression( 4169 exp.Column, 4170 this=field, 4171 table=this.this, 4172 db=this.args.get("table"), 4173 catalog=this.args.get("db"), 4174 ) 4175 else: 4176 this = self.expression(exp.Dot, this=this, expression=field) 4177 this = self._parse_bracket(this) 4178 return this 4179 4180 def _parse_primary(self) -> t.Optional[exp.Expression]: 4181 if self._match_set(self.PRIMARY_PARSERS): 4182 token_type = self._prev.token_type 4183 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4184 4185 if token_type == TokenType.STRING: 4186 expressions = [primary] 4187 while self._match(TokenType.STRING): 4188 expressions.append(exp.Literal.string(self._prev.text)) 4189 4190 if len(expressions) > 1: 4191 return self.expression(exp.Concat, expressions=expressions) 4192 4193 return primary 4194 4195 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4196 return exp.Literal.number(f"0.{self._prev.text}") 4197 4198 if self._match(TokenType.L_PAREN): 4199 comments = self._prev_comments 4200 query = self._parse_select() 4201 4202 if query: 4203 expressions = [query] 4204 else: 4205 expressions = self._parse_expressions() 4206 4207 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4208 4209 if isinstance(this, exp.UNWRAPPED_QUERIES): 4210 this = self._parse_set_operations( 4211 self._parse_subquery(this=this, parse_alias=False) 4212 ) 4213 elif isinstance(this, exp.Subquery): 4214 this = self._parse_subquery( 4215 this=self._parse_set_operations(this), parse_alias=False 4216 ) 4217 elif len(expressions) > 1: 4218 this = self.expression(exp.Tuple, expressions=expressions) 4219 else: 4220 this = self.expression(exp.Paren, this=this) 4221 4222 if this: 4223 this.add_comments(comments) 4224 4225 self._match_r_paren(expression=this) 4226 return this 4227 4228 return None 4229 4230 def _parse_field( 4231 self, 4232 any_token: bool = False, 4233 tokens: t.Optional[t.Collection[TokenType]] = None, 4234 anonymous_func: bool = False, 4235 ) -> t.Optional[exp.Expression]: 4236 return ( 4237 self._parse_primary() 4238 or self._parse_function(anonymous=anonymous_func) 4239 or self._parse_id_var(any_token=any_token, tokens=tokens) 4240 ) 4241 4242 def _parse_function( 4243 self, 4244 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4245 anonymous: bool = False, 4246 optional_parens: bool = True, 4247 ) -> t.Optional[exp.Expression]: 4248 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4249 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4250 fn_syntax = False 4251 if ( 4252 self._match(TokenType.L_BRACE, advance=False) 4253 and self._next 4254 and self._next.text.upper() == "FN" 4255 ): 4256 self._advance(2) 4257 fn_syntax = True 4258 4259 func = self._parse_function_call( 4260 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4261 ) 4262 4263 if fn_syntax: 4264 self._match(TokenType.R_BRACE) 4265 4266 return func 4267 4268 def _parse_function_call( 4269 self, 4270 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4271 anonymous: bool = False, 4272 optional_parens: bool = True, 4273 ) -> t.Optional[exp.Expression]: 4274 if not self._curr: 4275 return None 4276 4277 comments = self._curr.comments 4278 token_type = self._curr.token_type 4279 this = self._curr.text 4280 upper = this.upper() 4281 4282 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4283 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4284 self._advance() 4285 return self._parse_window(parser(self)) 4286 4287 if not self._next or self._next.token_type != TokenType.L_PAREN: 4288 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4289 self._advance() 4290 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4291 4292 return None 4293 4294 if token_type not in self.FUNC_TOKENS: 4295 return None 4296 4297 self._advance(2) 4298 4299 parser = self.FUNCTION_PARSERS.get(upper) 4300 if parser and not anonymous: 4301 this = parser(self) 4302 else: 4303 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4304 4305 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4306 this = self.expression(subquery_predicate, this=self._parse_select()) 4307 self._match_r_paren() 4308 return this 4309 4310 if functions is None: 4311 functions = self.FUNCTIONS 4312 4313 function = functions.get(upper) 4314 4315 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4316 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4317 4318 if alias: 4319 args = self._kv_to_prop_eq(args) 4320 4321 if function and not anonymous: 4322 if "dialect" in function.__code__.co_varnames: 4323 func = function(args, dialect=self.dialect) 4324 else: 4325 func = function(args) 4326 4327 func = self.validate_expression(func, args) 4328 if not self.dialect.NORMALIZE_FUNCTIONS: 4329 func.meta["name"] = this 4330 4331 this = func 4332 else: 4333 if token_type == TokenType.IDENTIFIER: 4334 this = exp.Identifier(this=this, quoted=True) 4335 this = self.expression(exp.Anonymous, this=this, expressions=args) 4336 4337 if isinstance(this, exp.Expression): 4338 this.add_comments(comments) 4339 4340 self._match_r_paren(this) 4341 return self._parse_window(this) 4342 4343 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4344 transformed = [] 4345 4346 for e in expressions: 4347 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4348 if isinstance(e, exp.Alias): 4349 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4350 4351 if not isinstance(e, exp.PropertyEQ): 4352 e = self.expression( 4353 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4354 ) 4355 4356 if isinstance(e.this, exp.Column): 4357 e.this.replace(e.this.this) 4358 4359 transformed.append(e) 4360 4361 return transformed 4362 4363 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4364 return self._parse_column_def(self._parse_id_var()) 4365 4366 def _parse_user_defined_function( 4367 self, kind: t.Optional[TokenType] = None 4368 ) -> t.Optional[exp.Expression]: 4369 this = self._parse_id_var() 4370 4371 while self._match(TokenType.DOT): 4372 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4373 4374 if not self._match(TokenType.L_PAREN): 4375 return this 4376 4377 expressions = self._parse_csv(self._parse_function_parameter) 4378 self._match_r_paren() 4379 return self.expression( 4380 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4381 ) 4382 4383 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4384 literal = self._parse_primary() 4385 if literal: 4386 return self.expression(exp.Introducer, this=token.text, expression=literal) 4387 4388 return self.expression(exp.Identifier, this=token.text) 4389 4390 def _parse_session_parameter(self) -> exp.SessionParameter: 4391 kind = None 4392 this = self._parse_id_var() or self._parse_primary() 4393 4394 if this and self._match(TokenType.DOT): 4395 kind = this.name 4396 this = self._parse_var() or self._parse_primary() 4397 4398 return self.expression(exp.SessionParameter, this=this, kind=kind) 4399 4400 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4401 index = self._index 4402 4403 if self._match(TokenType.L_PAREN): 4404 expressions = t.cast( 4405 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4406 ) 4407 4408 if not self._match(TokenType.R_PAREN): 4409 self._retreat(index) 4410 else: 4411 expressions = [self._parse_id_var()] 4412 4413 if self._match_set(self.LAMBDAS): 4414 return self.LAMBDAS[self._prev.token_type](self, expressions) 4415 4416 self._retreat(index) 4417 4418 this: t.Optional[exp.Expression] 4419 4420 if self._match(TokenType.DISTINCT): 4421 this = self.expression( 4422 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4423 ) 4424 else: 4425 this = self._parse_select_or_expression(alias=alias) 4426 4427 return self._parse_limit( 4428 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4429 ) 4430 4431 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4432 index = self._index 4433 4434 if not self.errors: 4435 try: 4436 if self._parse_select(nested=True): 4437 return this 4438 except ParseError: 4439 pass 4440 finally: 4441 self.errors.clear() 4442 self._retreat(index) 4443 4444 if not self._match(TokenType.L_PAREN): 4445 return this 4446 4447 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4448 4449 self._match_r_paren() 4450 return self.expression(exp.Schema, this=this, expressions=args) 4451 4452 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4453 return self._parse_column_def(self._parse_field(any_token=True)) 4454 4455 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4456 # column defs are not really columns, they're identifiers 4457 if isinstance(this, exp.Column): 4458 this = this.this 4459 4460 kind = self._parse_types(schema=True) 4461 4462 if self._match_text_seq("FOR", "ORDINALITY"): 4463 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4464 4465 constraints: t.List[exp.Expression] = [] 4466 4467 if not kind and self._match(TokenType.ALIAS): 4468 constraints.append( 4469 self.expression( 4470 exp.ComputedColumnConstraint, 4471 this=self._parse_conjunction(), 4472 persisted=self._match_text_seq("PERSISTED"), 4473 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4474 ) 4475 ) 4476 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4477 self._match(TokenType.ALIAS) 4478 constraints.append( 4479 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4480 ) 4481 4482 while True: 4483 constraint = self._parse_column_constraint() 4484 if not constraint: 4485 break 4486 constraints.append(constraint) 4487 4488 if not kind and not constraints: 4489 return this 4490 4491 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4492 4493 def _parse_auto_increment( 4494 self, 4495 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4496 start = None 4497 increment = None 4498 4499 if self._match(TokenType.L_PAREN, advance=False): 4500 args = self._parse_wrapped_csv(self._parse_bitwise) 4501 start = seq_get(args, 0) 4502 increment = seq_get(args, 1) 4503 elif self._match_text_seq("START"): 4504 start = self._parse_bitwise() 4505 self._match_text_seq("INCREMENT") 4506 increment = self._parse_bitwise() 4507 4508 if start and increment: 4509 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4510 4511 return exp.AutoIncrementColumnConstraint() 4512 4513 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4514 if not self._match_text_seq("REFRESH"): 4515 self._retreat(self._index - 1) 4516 return None 4517 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4518 4519 def _parse_compress(self) -> exp.CompressColumnConstraint: 4520 if self._match(TokenType.L_PAREN, advance=False): 4521 return self.expression( 4522 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4523 ) 4524 4525 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4526 4527 def _parse_generated_as_identity( 4528 self, 4529 ) -> ( 4530 exp.GeneratedAsIdentityColumnConstraint 4531 | exp.ComputedColumnConstraint 4532 | exp.GeneratedAsRowColumnConstraint 4533 ): 4534 if self._match_text_seq("BY", "DEFAULT"): 4535 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4536 this = self.expression( 4537 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4538 ) 4539 else: 4540 self._match_text_seq("ALWAYS") 4541 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4542 4543 self._match(TokenType.ALIAS) 4544 4545 if self._match_text_seq("ROW"): 4546 start = self._match_text_seq("START") 4547 if not start: 4548 self._match(TokenType.END) 4549 hidden = self._match_text_seq("HIDDEN") 4550 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4551 4552 identity = self._match_text_seq("IDENTITY") 4553 4554 if self._match(TokenType.L_PAREN): 4555 if self._match(TokenType.START_WITH): 4556 this.set("start", self._parse_bitwise()) 4557 if self._match_text_seq("INCREMENT", "BY"): 4558 this.set("increment", self._parse_bitwise()) 4559 if self._match_text_seq("MINVALUE"): 4560 this.set("minvalue", self._parse_bitwise()) 4561 if self._match_text_seq("MAXVALUE"): 4562 this.set("maxvalue", self._parse_bitwise()) 4563 4564 if self._match_text_seq("CYCLE"): 4565 this.set("cycle", True) 4566 elif self._match_text_seq("NO", "CYCLE"): 4567 this.set("cycle", False) 4568 4569 if not identity: 4570 this.set("expression", self._parse_bitwise()) 4571 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4572 args = self._parse_csv(self._parse_bitwise) 4573 this.set("start", seq_get(args, 0)) 4574 this.set("increment", seq_get(args, 1)) 4575 4576 self._match_r_paren() 4577 4578 return this 4579 4580 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4581 self._match_text_seq("LENGTH") 4582 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4583 4584 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4585 if self._match_text_seq("NULL"): 4586 return self.expression(exp.NotNullColumnConstraint) 4587 if self._match_text_seq("CASESPECIFIC"): 4588 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4589 if self._match_text_seq("FOR", "REPLICATION"): 4590 return self.expression(exp.NotForReplicationColumnConstraint) 4591 return None 4592 4593 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4594 if self._match(TokenType.CONSTRAINT): 4595 this = self._parse_id_var() 4596 else: 4597 this = None 4598 4599 if self._match_texts(self.CONSTRAINT_PARSERS): 4600 return self.expression( 4601 exp.ColumnConstraint, 4602 this=this, 4603 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4604 ) 4605 4606 return this 4607 4608 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4609 if not self._match(TokenType.CONSTRAINT): 4610 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4611 4612 return self.expression( 4613 exp.Constraint, 4614 this=self._parse_id_var(), 4615 expressions=self._parse_unnamed_constraints(), 4616 ) 4617 4618 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4619 constraints = [] 4620 while True: 4621 constraint = self._parse_unnamed_constraint() or self._parse_function() 4622 if not constraint: 4623 break 4624 constraints.append(constraint) 4625 4626 return constraints 4627 4628 def _parse_unnamed_constraint( 4629 self, constraints: t.Optional[t.Collection[str]] = None 4630 ) -> t.Optional[exp.Expression]: 4631 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4632 constraints or self.CONSTRAINT_PARSERS 4633 ): 4634 return None 4635 4636 constraint = self._prev.text.upper() 4637 if constraint not in self.CONSTRAINT_PARSERS: 4638 self.raise_error(f"No parser found for schema constraint {constraint}.") 4639 4640 return self.CONSTRAINT_PARSERS[constraint](self) 4641 4642 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4643 self._match_text_seq("KEY") 4644 return self.expression( 4645 exp.UniqueColumnConstraint, 4646 this=self._parse_schema(self._parse_id_var(any_token=False)), 4647 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4648 on_conflict=self._parse_on_conflict(), 4649 ) 4650 4651 def _parse_key_constraint_options(self) -> t.List[str]: 4652 options = [] 4653 while True: 4654 if not self._curr: 4655 break 4656 4657 if self._match(TokenType.ON): 4658 action = None 4659 on = self._advance_any() and self._prev.text 4660 4661 if self._match_text_seq("NO", "ACTION"): 4662 action = "NO ACTION" 4663 elif self._match_text_seq("CASCADE"): 4664 action = "CASCADE" 4665 elif self._match_text_seq("RESTRICT"): 4666 action = "RESTRICT" 4667 elif self._match_pair(TokenType.SET, TokenType.NULL): 4668 action = "SET NULL" 4669 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4670 action = "SET DEFAULT" 4671 else: 4672 self.raise_error("Invalid key constraint") 4673 4674 options.append(f"ON {on} {action}") 4675 elif self._match_text_seq("NOT", "ENFORCED"): 4676 options.append("NOT ENFORCED") 4677 elif self._match_text_seq("DEFERRABLE"): 4678 options.append("DEFERRABLE") 4679 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4680 options.append("INITIALLY DEFERRED") 4681 elif self._match_text_seq("NORELY"): 4682 options.append("NORELY") 4683 elif self._match_text_seq("MATCH", "FULL"): 4684 options.append("MATCH FULL") 4685 else: 4686 break 4687 4688 return options 4689 4690 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4691 if match and not self._match(TokenType.REFERENCES): 4692 return None 4693 4694 expressions = None 4695 this = self._parse_table(schema=True) 4696 options = self._parse_key_constraint_options() 4697 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4698 4699 def _parse_foreign_key(self) -> exp.ForeignKey: 4700 expressions = self._parse_wrapped_id_vars() 4701 reference = self._parse_references() 4702 options = {} 4703 4704 while self._match(TokenType.ON): 4705 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4706 self.raise_error("Expected DELETE or UPDATE") 4707 4708 kind = self._prev.text.lower() 4709 4710 if self._match_text_seq("NO", "ACTION"): 4711 action = "NO ACTION" 4712 elif self._match(TokenType.SET): 4713 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4714 action = "SET " + self._prev.text.upper() 4715 else: 4716 self._advance() 4717 action = self._prev.text.upper() 4718 4719 options[kind] = action 4720 4721 return self.expression( 4722 exp.ForeignKey, 4723 expressions=expressions, 4724 reference=reference, 4725 **options, # type: ignore 4726 ) 4727 4728 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4729 return self._parse_field() 4730 4731 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4732 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4733 self._retreat(self._index - 1) 4734 return None 4735 4736 id_vars = self._parse_wrapped_id_vars() 4737 return self.expression( 4738 exp.PeriodForSystemTimeConstraint, 4739 this=seq_get(id_vars, 0), 4740 expression=seq_get(id_vars, 1), 4741 ) 4742 4743 def _parse_primary_key( 4744 self, wrapped_optional: bool = False, in_props: bool = False 4745 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4746 desc = ( 4747 self._match_set((TokenType.ASC, TokenType.DESC)) 4748 and self._prev.token_type == TokenType.DESC 4749 ) 4750 4751 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4752 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4753 4754 expressions = self._parse_wrapped_csv( 4755 self._parse_primary_key_part, optional=wrapped_optional 4756 ) 4757 options = self._parse_key_constraint_options() 4758 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4759 4760 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4761 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4762 4763 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4764 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4765 return this 4766 4767 bracket_kind = self._prev.token_type 4768 expressions = self._parse_csv( 4769 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4770 ) 4771 4772 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4773 self.raise_error("Expected ]") 4774 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4775 self.raise_error("Expected }") 4776 4777 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4778 if bracket_kind == TokenType.L_BRACE: 4779 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4780 elif not this or this.name.upper() == "ARRAY": 4781 this = self.expression(exp.Array, expressions=expressions) 4782 else: 4783 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4784 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4785 4786 self._add_comments(this) 4787 return self._parse_bracket(this) 4788 4789 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4790 if self._match(TokenType.COLON): 4791 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4792 return this 4793 4794 def _parse_case(self) -> t.Optional[exp.Expression]: 4795 ifs = [] 4796 default = None 4797 4798 comments = self._prev_comments 4799 expression = self._parse_conjunction() 4800 4801 while self._match(TokenType.WHEN): 4802 this = self._parse_conjunction() 4803 self._match(TokenType.THEN) 4804 then = self._parse_conjunction() 4805 ifs.append(self.expression(exp.If, this=this, true=then)) 4806 4807 if self._match(TokenType.ELSE): 4808 default = self._parse_conjunction() 4809 4810 if not self._match(TokenType.END): 4811 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4812 default = exp.column("interval") 4813 else: 4814 self.raise_error("Expected END after CASE", self._prev) 4815 4816 return self.expression( 4817 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4818 ) 4819 4820 def _parse_if(self) -> t.Optional[exp.Expression]: 4821 if self._match(TokenType.L_PAREN): 4822 args = self._parse_csv(self._parse_conjunction) 4823 this = self.validate_expression(exp.If.from_arg_list(args), args) 4824 self._match_r_paren() 4825 else: 4826 index = self._index - 1 4827 4828 if self.NO_PAREN_IF_COMMANDS and index == 0: 4829 return self._parse_as_command(self._prev) 4830 4831 condition = self._parse_conjunction() 4832 4833 if not condition: 4834 self._retreat(index) 4835 return None 4836 4837 self._match(TokenType.THEN) 4838 true = self._parse_conjunction() 4839 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4840 self._match(TokenType.END) 4841 this = self.expression(exp.If, this=condition, true=true, false=false) 4842 4843 return this 4844 4845 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4846 if not self._match_text_seq("VALUE", "FOR"): 4847 self._retreat(self._index - 1) 4848 return None 4849 4850 return self.expression( 4851 exp.NextValueFor, 4852 this=self._parse_column(), 4853 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4854 ) 4855 4856 def _parse_extract(self) -> exp.Extract: 4857 this = self._parse_function() or self._parse_var() or self._parse_type() 4858 4859 if self._match(TokenType.FROM): 4860 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4861 4862 if not self._match(TokenType.COMMA): 4863 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4864 4865 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4866 4867 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4868 this = self._parse_conjunction() 4869 4870 if not self._match(TokenType.ALIAS): 4871 if self._match(TokenType.COMMA): 4872 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4873 4874 self.raise_error("Expected AS after CAST") 4875 4876 fmt = None 4877 to = self._parse_types() 4878 4879 if self._match(TokenType.FORMAT): 4880 fmt_string = self._parse_string() 4881 fmt = self._parse_at_time_zone(fmt_string) 4882 4883 if not to: 4884 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4885 if to.this in exp.DataType.TEMPORAL_TYPES: 4886 this = self.expression( 4887 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4888 this=this, 4889 format=exp.Literal.string( 4890 format_time( 4891 fmt_string.this if fmt_string else "", 4892 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4893 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4894 ) 4895 ), 4896 ) 4897 4898 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4899 this.set("zone", fmt.args["zone"]) 4900 return this 4901 elif not to: 4902 self.raise_error("Expected TYPE after CAST") 4903 elif isinstance(to, exp.Identifier): 4904 to = exp.DataType.build(to.name, udt=True) 4905 elif to.this == exp.DataType.Type.CHAR: 4906 if self._match(TokenType.CHARACTER_SET): 4907 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4908 4909 return self.expression( 4910 exp.Cast if strict else exp.TryCast, 4911 this=this, 4912 to=to, 4913 format=fmt, 4914 safe=safe, 4915 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 4916 ) 4917 4918 def _parse_string_agg(self) -> exp.Expression: 4919 if self._match(TokenType.DISTINCT): 4920 args: t.List[t.Optional[exp.Expression]] = [ 4921 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4922 ] 4923 if self._match(TokenType.COMMA): 4924 args.extend(self._parse_csv(self._parse_conjunction)) 4925 else: 4926 args = self._parse_csv(self._parse_conjunction) # type: ignore 4927 4928 index = self._index 4929 if not self._match(TokenType.R_PAREN) and args: 4930 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4931 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4932 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4933 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4934 4935 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4936 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4937 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4938 if not self._match_text_seq("WITHIN", "GROUP"): 4939 self._retreat(index) 4940 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4941 4942 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4943 order = self._parse_order(this=seq_get(args, 0)) 4944 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4945 4946 def _parse_convert( 4947 self, strict: bool, safe: t.Optional[bool] = None 4948 ) -> t.Optional[exp.Expression]: 4949 this = self._parse_bitwise() 4950 4951 if self._match(TokenType.USING): 4952 to: t.Optional[exp.Expression] = self.expression( 4953 exp.CharacterSet, this=self._parse_var() 4954 ) 4955 elif self._match(TokenType.COMMA): 4956 to = self._parse_types() 4957 else: 4958 to = None 4959 4960 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4961 4962 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4963 """ 4964 There are generally two variants of the DECODE function: 4965 4966 - DECODE(bin, charset) 4967 - DECODE(expression, search, result [, search, result] ... [, default]) 4968 4969 The second variant will always be parsed into a CASE expression. Note that NULL 4970 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4971 instead of relying on pattern matching. 4972 """ 4973 args = self._parse_csv(self._parse_conjunction) 4974 4975 if len(args) < 3: 4976 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4977 4978 expression, *expressions = args 4979 if not expression: 4980 return None 4981 4982 ifs = [] 4983 for search, result in zip(expressions[::2], expressions[1::2]): 4984 if not search or not result: 4985 return None 4986 4987 if isinstance(search, exp.Literal): 4988 ifs.append( 4989 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4990 ) 4991 elif isinstance(search, exp.Null): 4992 ifs.append( 4993 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4994 ) 4995 else: 4996 cond = exp.or_( 4997 exp.EQ(this=expression.copy(), expression=search), 4998 exp.and_( 4999 exp.Is(this=expression.copy(), expression=exp.Null()), 5000 exp.Is(this=search.copy(), expression=exp.Null()), 5001 copy=False, 5002 ), 5003 copy=False, 5004 ) 5005 ifs.append(exp.If(this=cond, true=result)) 5006 5007 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5008 5009 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5010 self._match_text_seq("KEY") 5011 key = self._parse_column() 5012 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5013 self._match_text_seq("VALUE") 5014 value = self._parse_bitwise() 5015 5016 if not key and not value: 5017 return None 5018 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5019 5020 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5021 if not this or not self._match_text_seq("FORMAT", "JSON"): 5022 return this 5023 5024 return self.expression(exp.FormatJson, this=this) 5025 5026 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5027 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5028 for value in values: 5029 if self._match_text_seq(value, "ON", on): 5030 return f"{value} ON {on}" 5031 5032 return None 5033 5034 @t.overload 5035 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5036 5037 @t.overload 5038 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5039 5040 def _parse_json_object(self, agg=False): 5041 star = self._parse_star() 5042 expressions = ( 5043 [star] 5044 if star 5045 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5046 ) 5047 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5048 5049 unique_keys = None 5050 if self._match_text_seq("WITH", "UNIQUE"): 5051 unique_keys = True 5052 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5053 unique_keys = False 5054 5055 self._match_text_seq("KEYS") 5056 5057 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5058 self._parse_type() 5059 ) 5060 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5061 5062 return self.expression( 5063 exp.JSONObjectAgg if agg else exp.JSONObject, 5064 expressions=expressions, 5065 null_handling=null_handling, 5066 unique_keys=unique_keys, 5067 return_type=return_type, 5068 encoding=encoding, 5069 ) 5070 5071 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5072 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5073 if not self._match_text_seq("NESTED"): 5074 this = self._parse_id_var() 5075 kind = self._parse_types(allow_identifiers=False) 5076 nested = None 5077 else: 5078 this = None 5079 kind = None 5080 nested = True 5081 5082 path = self._match_text_seq("PATH") and self._parse_string() 5083 nested_schema = nested and self._parse_json_schema() 5084 5085 return self.expression( 5086 exp.JSONColumnDef, 5087 this=this, 5088 kind=kind, 5089 path=path, 5090 nested_schema=nested_schema, 5091 ) 5092 5093 def _parse_json_schema(self) -> exp.JSONSchema: 5094 self._match_text_seq("COLUMNS") 5095 return self.expression( 5096 exp.JSONSchema, 5097 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5098 ) 5099 5100 def _parse_json_table(self) -> exp.JSONTable: 5101 this = self._parse_format_json(self._parse_bitwise()) 5102 path = self._match(TokenType.COMMA) and self._parse_string() 5103 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5104 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5105 schema = self._parse_json_schema() 5106 5107 return exp.JSONTable( 5108 this=this, 5109 schema=schema, 5110 path=path, 5111 error_handling=error_handling, 5112 empty_handling=empty_handling, 5113 ) 5114 5115 def _parse_match_against(self) -> exp.MatchAgainst: 5116 expressions = self._parse_csv(self._parse_column) 5117 5118 self._match_text_seq(")", "AGAINST", "(") 5119 5120 this = self._parse_string() 5121 5122 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5123 modifier = "IN NATURAL LANGUAGE MODE" 5124 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5125 modifier = f"{modifier} WITH QUERY EXPANSION" 5126 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5127 modifier = "IN BOOLEAN MODE" 5128 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5129 modifier = "WITH QUERY EXPANSION" 5130 else: 5131 modifier = None 5132 5133 return self.expression( 5134 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5135 ) 5136 5137 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5138 def _parse_open_json(self) -> exp.OpenJSON: 5139 this = self._parse_bitwise() 5140 path = self._match(TokenType.COMMA) and self._parse_string() 5141 5142 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5143 this = self._parse_field(any_token=True) 5144 kind = self._parse_types() 5145 path = self._parse_string() 5146 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5147 5148 return self.expression( 5149 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5150 ) 5151 5152 expressions = None 5153 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5154 self._match_l_paren() 5155 expressions = self._parse_csv(_parse_open_json_column_def) 5156 5157 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5158 5159 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5160 args = self._parse_csv(self._parse_bitwise) 5161 5162 if self._match(TokenType.IN): 5163 return self.expression( 5164 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5165 ) 5166 5167 if haystack_first: 5168 haystack = seq_get(args, 0) 5169 needle = seq_get(args, 1) 5170 else: 5171 needle = seq_get(args, 0) 5172 haystack = seq_get(args, 1) 5173 5174 return self.expression( 5175 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5176 ) 5177 5178 def _parse_predict(self) -> exp.Predict: 5179 self._match_text_seq("MODEL") 5180 this = self._parse_table() 5181 5182 self._match(TokenType.COMMA) 5183 self._match_text_seq("TABLE") 5184 5185 return self.expression( 5186 exp.Predict, 5187 this=this, 5188 expression=self._parse_table(), 5189 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5190 ) 5191 5192 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5193 args = self._parse_csv(self._parse_table) 5194 return exp.JoinHint(this=func_name.upper(), expressions=args) 5195 5196 def _parse_substring(self) -> exp.Substring: 5197 # Postgres supports the form: substring(string [from int] [for int]) 5198 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5199 5200 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5201 5202 if self._match(TokenType.FROM): 5203 args.append(self._parse_bitwise()) 5204 if self._match(TokenType.FOR): 5205 args.append(self._parse_bitwise()) 5206 5207 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5208 5209 def _parse_trim(self) -> exp.Trim: 5210 # https://www.w3resource.com/sql/character-functions/trim.php 5211 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5212 5213 position = None 5214 collation = None 5215 expression = None 5216 5217 if self._match_texts(self.TRIM_TYPES): 5218 position = self._prev.text.upper() 5219 5220 this = self._parse_bitwise() 5221 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5222 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5223 expression = self._parse_bitwise() 5224 5225 if invert_order: 5226 this, expression = expression, this 5227 5228 if self._match(TokenType.COLLATE): 5229 collation = self._parse_bitwise() 5230 5231 return self.expression( 5232 exp.Trim, this=this, position=position, expression=expression, collation=collation 5233 ) 5234 5235 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5236 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5237 5238 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5239 return self._parse_window(self._parse_id_var(), alias=True) 5240 5241 def _parse_respect_or_ignore_nulls( 5242 self, this: t.Optional[exp.Expression] 5243 ) -> t.Optional[exp.Expression]: 5244 if self._match_text_seq("IGNORE", "NULLS"): 5245 return self.expression(exp.IgnoreNulls, this=this) 5246 if self._match_text_seq("RESPECT", "NULLS"): 5247 return self.expression(exp.RespectNulls, this=this) 5248 return this 5249 5250 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5251 if self._match(TokenType.HAVING): 5252 self._match_texts(("MAX", "MIN")) 5253 max = self._prev.text.upper() != "MIN" 5254 return self.expression( 5255 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5256 ) 5257 5258 return this 5259 5260 def _parse_window( 5261 self, this: t.Optional[exp.Expression], alias: bool = False 5262 ) -> t.Optional[exp.Expression]: 5263 func = this 5264 comments = func.comments if isinstance(func, exp.Expression) else None 5265 5266 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5267 self._match(TokenType.WHERE) 5268 this = self.expression( 5269 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5270 ) 5271 self._match_r_paren() 5272 5273 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5274 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5275 if self._match_text_seq("WITHIN", "GROUP"): 5276 order = self._parse_wrapped(self._parse_order) 5277 this = self.expression(exp.WithinGroup, this=this, expression=order) 5278 5279 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5280 # Some dialects choose to implement and some do not. 5281 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5282 5283 # There is some code above in _parse_lambda that handles 5284 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5285 5286 # The below changes handle 5287 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5288 5289 # Oracle allows both formats 5290 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5291 # and Snowflake chose to do the same for familiarity 5292 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5293 if isinstance(this, exp.AggFunc): 5294 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5295 5296 if ignore_respect and ignore_respect is not this: 5297 ignore_respect.replace(ignore_respect.this) 5298 this = self.expression(ignore_respect.__class__, this=this) 5299 5300 this = self._parse_respect_or_ignore_nulls(this) 5301 5302 # bigquery select from window x AS (partition by ...) 5303 if alias: 5304 over = None 5305 self._match(TokenType.ALIAS) 5306 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5307 return this 5308 else: 5309 over = self._prev.text.upper() 5310 5311 if comments: 5312 func.comments = None # type: ignore 5313 5314 if not self._match(TokenType.L_PAREN): 5315 return self.expression( 5316 exp.Window, 5317 comments=comments, 5318 this=this, 5319 alias=self._parse_id_var(False), 5320 over=over, 5321 ) 5322 5323 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5324 5325 first = self._match(TokenType.FIRST) 5326 if self._match_text_seq("LAST"): 5327 first = False 5328 5329 partition, order = self._parse_partition_and_order() 5330 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5331 5332 if kind: 5333 self._match(TokenType.BETWEEN) 5334 start = self._parse_window_spec() 5335 self._match(TokenType.AND) 5336 end = self._parse_window_spec() 5337 5338 spec = self.expression( 5339 exp.WindowSpec, 5340 kind=kind, 5341 start=start["value"], 5342 start_side=start["side"], 5343 end=end["value"], 5344 end_side=end["side"], 5345 ) 5346 else: 5347 spec = None 5348 5349 self._match_r_paren() 5350 5351 window = self.expression( 5352 exp.Window, 5353 comments=comments, 5354 this=this, 5355 partition_by=partition, 5356 order=order, 5357 spec=spec, 5358 alias=window_alias, 5359 over=over, 5360 first=first, 5361 ) 5362 5363 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5364 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5365 return self._parse_window(window, alias=alias) 5366 5367 return window 5368 5369 def _parse_partition_and_order( 5370 self, 5371 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5372 return self._parse_partition_by(), self._parse_order() 5373 5374 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5375 self._match(TokenType.BETWEEN) 5376 5377 return { 5378 "value": ( 5379 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5380 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5381 or self._parse_bitwise() 5382 ), 5383 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5384 } 5385 5386 def _parse_alias( 5387 self, this: t.Optional[exp.Expression], explicit: bool = False 5388 ) -> t.Optional[exp.Expression]: 5389 any_token = self._match(TokenType.ALIAS) 5390 comments = self._prev_comments 5391 5392 if explicit and not any_token: 5393 return this 5394 5395 if self._match(TokenType.L_PAREN): 5396 aliases = self.expression( 5397 exp.Aliases, 5398 comments=comments, 5399 this=this, 5400 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5401 ) 5402 self._match_r_paren(aliases) 5403 return aliases 5404 5405 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5406 self.STRING_ALIASES and self._parse_string_as_identifier() 5407 ) 5408 5409 if alias: 5410 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5411 column = this.this 5412 5413 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5414 if not this.comments and column and column.comments: 5415 this.comments = column.comments 5416 column.comments = None 5417 5418 return this 5419 5420 def _parse_id_var( 5421 self, 5422 any_token: bool = True, 5423 tokens: t.Optional[t.Collection[TokenType]] = None, 5424 ) -> t.Optional[exp.Expression]: 5425 identifier = self._parse_identifier() 5426 5427 if identifier: 5428 return identifier 5429 5430 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5431 quoted = self._prev.token_type == TokenType.STRING 5432 return exp.Identifier(this=self._prev.text, quoted=quoted) 5433 5434 return None 5435 5436 def _parse_string(self) -> t.Optional[exp.Expression]: 5437 if self._match_set(self.STRING_PARSERS): 5438 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5439 return self._parse_placeholder() 5440 5441 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5442 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5443 5444 def _parse_number(self) -> t.Optional[exp.Expression]: 5445 if self._match_set(self.NUMERIC_PARSERS): 5446 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5447 return self._parse_placeholder() 5448 5449 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5450 if self._match(TokenType.IDENTIFIER): 5451 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5452 return self._parse_placeholder() 5453 5454 def _parse_var( 5455 self, 5456 any_token: bool = False, 5457 tokens: t.Optional[t.Collection[TokenType]] = None, 5458 upper: bool = False, 5459 ) -> t.Optional[exp.Expression]: 5460 if ( 5461 (any_token and self._advance_any()) 5462 or self._match(TokenType.VAR) 5463 or (self._match_set(tokens) if tokens else False) 5464 ): 5465 return self.expression( 5466 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5467 ) 5468 return self._parse_placeholder() 5469 5470 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5471 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5472 self._advance() 5473 return self._prev 5474 return None 5475 5476 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5477 return self._parse_var() or self._parse_string() 5478 5479 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5480 return self._parse_primary() or self._parse_var(any_token=True) 5481 5482 def _parse_null(self) -> t.Optional[exp.Expression]: 5483 if self._match_set(self.NULL_TOKENS): 5484 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5485 return self._parse_placeholder() 5486 5487 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5488 if self._match(TokenType.TRUE): 5489 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5490 if self._match(TokenType.FALSE): 5491 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5492 return self._parse_placeholder() 5493 5494 def _parse_star(self) -> t.Optional[exp.Expression]: 5495 if self._match(TokenType.STAR): 5496 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5497 return self._parse_placeholder() 5498 5499 def _parse_parameter(self) -> exp.Parameter: 5500 self._match(TokenType.L_BRACE) 5501 this = self._parse_identifier() or self._parse_primary_or_var() 5502 expression = self._match(TokenType.COLON) and ( 5503 self._parse_identifier() or self._parse_primary_or_var() 5504 ) 5505 self._match(TokenType.R_BRACE) 5506 return self.expression(exp.Parameter, this=this, expression=expression) 5507 5508 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5509 if self._match_set(self.PLACEHOLDER_PARSERS): 5510 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5511 if placeholder: 5512 return placeholder 5513 self._advance(-1) 5514 return None 5515 5516 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5517 if not self._match(TokenType.EXCEPT): 5518 return None 5519 if self._match(TokenType.L_PAREN, advance=False): 5520 return self._parse_wrapped_csv(self._parse_column) 5521 5522 except_column = self._parse_column() 5523 return [except_column] if except_column else None 5524 5525 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5526 if not self._match(TokenType.REPLACE): 5527 return None 5528 if self._match(TokenType.L_PAREN, advance=False): 5529 return self._parse_wrapped_csv(self._parse_expression) 5530 5531 replace_expression = self._parse_expression() 5532 return [replace_expression] if replace_expression else None 5533 5534 def _parse_csv( 5535 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5536 ) -> t.List[exp.Expression]: 5537 parse_result = parse_method() 5538 items = [parse_result] if parse_result is not None else [] 5539 5540 while self._match(sep): 5541 self._add_comments(parse_result) 5542 parse_result = parse_method() 5543 if parse_result is not None: 5544 items.append(parse_result) 5545 5546 return items 5547 5548 def _parse_tokens( 5549 self, parse_method: t.Callable, expressions: t.Dict 5550 ) -> t.Optional[exp.Expression]: 5551 this = parse_method() 5552 5553 while self._match_set(expressions): 5554 this = self.expression( 5555 expressions[self._prev.token_type], 5556 this=this, 5557 comments=self._prev_comments, 5558 expression=parse_method(), 5559 ) 5560 5561 return this 5562 5563 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5564 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5565 5566 def _parse_wrapped_csv( 5567 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5568 ) -> t.List[exp.Expression]: 5569 return self._parse_wrapped( 5570 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5571 ) 5572 5573 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5574 wrapped = self._match(TokenType.L_PAREN) 5575 if not wrapped and not optional: 5576 self.raise_error("Expecting (") 5577 parse_result = parse_method() 5578 if wrapped: 5579 self._match_r_paren() 5580 return parse_result 5581 5582 def _parse_expressions(self) -> t.List[exp.Expression]: 5583 return self._parse_csv(self._parse_expression) 5584 5585 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5586 return self._parse_select() or self._parse_set_operations( 5587 self._parse_expression() if alias else self._parse_conjunction() 5588 ) 5589 5590 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5591 return self._parse_query_modifiers( 5592 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5593 ) 5594 5595 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5596 this = None 5597 if self._match_texts(self.TRANSACTION_KIND): 5598 this = self._prev.text 5599 5600 self._match_texts(("TRANSACTION", "WORK")) 5601 5602 modes = [] 5603 while True: 5604 mode = [] 5605 while self._match(TokenType.VAR): 5606 mode.append(self._prev.text) 5607 5608 if mode: 5609 modes.append(" ".join(mode)) 5610 if not self._match(TokenType.COMMA): 5611 break 5612 5613 return self.expression(exp.Transaction, this=this, modes=modes) 5614 5615 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5616 chain = None 5617 savepoint = None 5618 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5619 5620 self._match_texts(("TRANSACTION", "WORK")) 5621 5622 if self._match_text_seq("TO"): 5623 self._match_text_seq("SAVEPOINT") 5624 savepoint = self._parse_id_var() 5625 5626 if self._match(TokenType.AND): 5627 chain = not self._match_text_seq("NO") 5628 self._match_text_seq("CHAIN") 5629 5630 if is_rollback: 5631 return self.expression(exp.Rollback, savepoint=savepoint) 5632 5633 return self.expression(exp.Commit, chain=chain) 5634 5635 def _parse_refresh(self) -> exp.Refresh: 5636 self._match(TokenType.TABLE) 5637 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5638 5639 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5640 if not self._match_text_seq("ADD"): 5641 return None 5642 5643 self._match(TokenType.COLUMN) 5644 exists_column = self._parse_exists(not_=True) 5645 expression = self._parse_field_def() 5646 5647 if expression: 5648 expression.set("exists", exists_column) 5649 5650 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5651 if self._match_texts(("FIRST", "AFTER")): 5652 position = self._prev.text 5653 column_position = self.expression( 5654 exp.ColumnPosition, this=self._parse_column(), position=position 5655 ) 5656 expression.set("position", column_position) 5657 5658 return expression 5659 5660 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5661 drop = self._match(TokenType.DROP) and self._parse_drop() 5662 if drop and not isinstance(drop, exp.Command): 5663 drop.set("kind", drop.args.get("kind", "COLUMN")) 5664 return drop 5665 5666 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5667 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5668 return self.expression( 5669 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5670 ) 5671 5672 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5673 index = self._index - 1 5674 5675 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5676 return self._parse_csv( 5677 lambda: self.expression( 5678 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5679 ) 5680 ) 5681 5682 self._retreat(index) 5683 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5684 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5685 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5686 5687 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5688 self._match(TokenType.COLUMN) 5689 column = self._parse_field(any_token=True) 5690 5691 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5692 return self.expression(exp.AlterColumn, this=column, drop=True) 5693 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5694 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5695 if self._match(TokenType.COMMENT): 5696 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5697 5698 self._match_text_seq("SET", "DATA") 5699 return self.expression( 5700 exp.AlterColumn, 5701 this=column, 5702 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5703 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5704 using=self._match(TokenType.USING) and self._parse_conjunction(), 5705 ) 5706 5707 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5708 index = self._index - 1 5709 5710 partition_exists = self._parse_exists() 5711 if self._match(TokenType.PARTITION, advance=False): 5712 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5713 5714 self._retreat(index) 5715 return self._parse_csv(self._parse_drop_column) 5716 5717 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5718 if self._match(TokenType.COLUMN): 5719 exists = self._parse_exists() 5720 old_column = self._parse_column() 5721 to = self._match_text_seq("TO") 5722 new_column = self._parse_column() 5723 5724 if old_column is None or to is None or new_column is None: 5725 return None 5726 5727 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5728 5729 self._match_text_seq("TO") 5730 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5731 5732 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5733 start = self._prev 5734 5735 if not self._match(TokenType.TABLE): 5736 return self._parse_as_command(start) 5737 5738 exists = self._parse_exists() 5739 only = self._match_text_seq("ONLY") 5740 this = self._parse_table(schema=True) 5741 5742 if self._next: 5743 self._advance() 5744 5745 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5746 if parser: 5747 actions = ensure_list(parser(self)) 5748 options = self._parse_csv(self._parse_property) 5749 5750 if not self._curr and actions: 5751 return self.expression( 5752 exp.AlterTable, 5753 this=this, 5754 exists=exists, 5755 actions=actions, 5756 only=only, 5757 options=options, 5758 ) 5759 5760 return self._parse_as_command(start) 5761 5762 def _parse_merge(self) -> exp.Merge: 5763 self._match(TokenType.INTO) 5764 target = self._parse_table() 5765 5766 if target and self._match(TokenType.ALIAS, advance=False): 5767 target.set("alias", self._parse_table_alias()) 5768 5769 self._match(TokenType.USING) 5770 using = self._parse_table() 5771 5772 self._match(TokenType.ON) 5773 on = self._parse_conjunction() 5774 5775 return self.expression( 5776 exp.Merge, 5777 this=target, 5778 using=using, 5779 on=on, 5780 expressions=self._parse_when_matched(), 5781 ) 5782 5783 def _parse_when_matched(self) -> t.List[exp.When]: 5784 whens = [] 5785 5786 while self._match(TokenType.WHEN): 5787 matched = not self._match(TokenType.NOT) 5788 self._match_text_seq("MATCHED") 5789 source = ( 5790 False 5791 if self._match_text_seq("BY", "TARGET") 5792 else self._match_text_seq("BY", "SOURCE") 5793 ) 5794 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5795 5796 self._match(TokenType.THEN) 5797 5798 if self._match(TokenType.INSERT): 5799 _this = self._parse_star() 5800 if _this: 5801 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5802 else: 5803 then = self.expression( 5804 exp.Insert, 5805 this=self._parse_value(), 5806 expression=self._match_text_seq("VALUES") and self._parse_value(), 5807 ) 5808 elif self._match(TokenType.UPDATE): 5809 expressions = self._parse_star() 5810 if expressions: 5811 then = self.expression(exp.Update, expressions=expressions) 5812 else: 5813 then = self.expression( 5814 exp.Update, 5815 expressions=self._match(TokenType.SET) 5816 and self._parse_csv(self._parse_equality), 5817 ) 5818 elif self._match(TokenType.DELETE): 5819 then = self.expression(exp.Var, this=self._prev.text) 5820 else: 5821 then = None 5822 5823 whens.append( 5824 self.expression( 5825 exp.When, 5826 matched=matched, 5827 source=source, 5828 condition=condition, 5829 then=then, 5830 ) 5831 ) 5832 return whens 5833 5834 def _parse_show(self) -> t.Optional[exp.Expression]: 5835 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5836 if parser: 5837 return parser(self) 5838 return self._parse_as_command(self._prev) 5839 5840 def _parse_set_item_assignment( 5841 self, kind: t.Optional[str] = None 5842 ) -> t.Optional[exp.Expression]: 5843 index = self._index 5844 5845 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5846 return self._parse_set_transaction(global_=kind == "GLOBAL") 5847 5848 left = self._parse_primary() or self._parse_id_var() 5849 assignment_delimiter = self._match_texts(("=", "TO")) 5850 5851 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5852 self._retreat(index) 5853 return None 5854 5855 right = self._parse_statement() or self._parse_id_var() 5856 this = self.expression(exp.EQ, this=left, expression=right) 5857 5858 return self.expression(exp.SetItem, this=this, kind=kind) 5859 5860 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5861 self._match_text_seq("TRANSACTION") 5862 characteristics = self._parse_csv( 5863 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5864 ) 5865 return self.expression( 5866 exp.SetItem, 5867 expressions=characteristics, 5868 kind="TRANSACTION", 5869 **{"global": global_}, # type: ignore 5870 ) 5871 5872 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5873 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5874 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5875 5876 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5877 index = self._index 5878 set_ = self.expression( 5879 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5880 ) 5881 5882 if self._curr: 5883 self._retreat(index) 5884 return self._parse_as_command(self._prev) 5885 5886 return set_ 5887 5888 def _parse_var_from_options( 5889 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5890 ) -> t.Optional[exp.Var]: 5891 start = self._curr 5892 if not start: 5893 return None 5894 5895 option = start.text.upper() 5896 continuations = options.get(option) 5897 5898 index = self._index 5899 self._advance() 5900 for keywords in continuations or []: 5901 if isinstance(keywords, str): 5902 keywords = (keywords,) 5903 5904 if self._match_text_seq(*keywords): 5905 option = f"{option} {' '.join(keywords)}" 5906 break 5907 else: 5908 if continuations or continuations is None: 5909 if raise_unmatched: 5910 self.raise_error(f"Unknown option {option}") 5911 5912 self._retreat(index) 5913 return None 5914 5915 return exp.var(option) 5916 5917 def _parse_as_command(self, start: Token) -> exp.Command: 5918 while self._curr: 5919 self._advance() 5920 text = self._find_sql(start, self._prev) 5921 size = len(start.text) 5922 self._warn_unsupported() 5923 return exp.Command(this=text[:size], expression=text[size:]) 5924 5925 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5926 settings = [] 5927 5928 self._match_l_paren() 5929 kind = self._parse_id_var() 5930 5931 if self._match(TokenType.L_PAREN): 5932 while True: 5933 key = self._parse_id_var() 5934 value = self._parse_primary() 5935 5936 if not key and value is None: 5937 break 5938 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5939 self._match(TokenType.R_PAREN) 5940 5941 self._match_r_paren() 5942 5943 return self.expression( 5944 exp.DictProperty, 5945 this=this, 5946 kind=kind.this if kind else None, 5947 settings=settings, 5948 ) 5949 5950 def _parse_dict_range(self, this: str) -> exp.DictRange: 5951 self._match_l_paren() 5952 has_min = self._match_text_seq("MIN") 5953 if has_min: 5954 min = self._parse_var() or self._parse_primary() 5955 self._match_text_seq("MAX") 5956 max = self._parse_var() or self._parse_primary() 5957 else: 5958 max = self._parse_var() or self._parse_primary() 5959 min = exp.Literal.number(0) 5960 self._match_r_paren() 5961 return self.expression(exp.DictRange, this=this, min=min, max=max) 5962 5963 def _parse_comprehension( 5964 self, this: t.Optional[exp.Expression] 5965 ) -> t.Optional[exp.Comprehension]: 5966 index = self._index 5967 expression = self._parse_column() 5968 if not self._match(TokenType.IN): 5969 self._retreat(index - 1) 5970 return None 5971 iterator = self._parse_column() 5972 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5973 return self.expression( 5974 exp.Comprehension, 5975 this=this, 5976 expression=expression, 5977 iterator=iterator, 5978 condition=condition, 5979 ) 5980 5981 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5982 if self._match(TokenType.HEREDOC_STRING): 5983 return self.expression(exp.Heredoc, this=self._prev.text) 5984 5985 if not self._match_text_seq("$"): 5986 return None 5987 5988 tags = ["$"] 5989 tag_text = None 5990 5991 if self._is_connected(): 5992 self._advance() 5993 tags.append(self._prev.text.upper()) 5994 else: 5995 self.raise_error("No closing $ found") 5996 5997 if tags[-1] != "$": 5998 if self._is_connected() and self._match_text_seq("$"): 5999 tag_text = tags[-1] 6000 tags.append("$") 6001 else: 6002 self.raise_error("No closing $ found") 6003 6004 heredoc_start = self._curr 6005 6006 while self._curr: 6007 if self._match_text_seq(*tags, advance=False): 6008 this = self._find_sql(heredoc_start, self._prev) 6009 self._advance(len(tags)) 6010 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6011 6012 self._advance() 6013 6014 self.raise_error(f"No closing {''.join(tags)} found") 6015 return None 6016 6017 def _find_parser( 6018 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6019 ) -> t.Optional[t.Callable]: 6020 if not self._curr: 6021 return None 6022 6023 index = self._index 6024 this = [] 6025 while True: 6026 # The current token might be multiple words 6027 curr = self._curr.text.upper() 6028 key = curr.split(" ") 6029 this.append(curr) 6030 6031 self._advance() 6032 result, trie = in_trie(trie, key) 6033 if result == TrieResult.FAILED: 6034 break 6035 6036 if result == TrieResult.EXISTS: 6037 subparser = parsers[" ".join(this)] 6038 return subparser 6039 6040 self._retreat(index) 6041 return None 6042 6043 def _match(self, token_type, advance=True, expression=None): 6044 if not self._curr: 6045 return None 6046 6047 if self._curr.token_type == token_type: 6048 if advance: 6049 self._advance() 6050 self._add_comments(expression) 6051 return True 6052 6053 return None 6054 6055 def _match_set(self, types, advance=True): 6056 if not self._curr: 6057 return None 6058 6059 if self._curr.token_type in types: 6060 if advance: 6061 self._advance() 6062 return True 6063 6064 return None 6065 6066 def _match_pair(self, token_type_a, token_type_b, advance=True): 6067 if not self._curr or not self._next: 6068 return None 6069 6070 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6071 if advance: 6072 self._advance(2) 6073 return True 6074 6075 return None 6076 6077 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6078 if not self._match(TokenType.L_PAREN, expression=expression): 6079 self.raise_error("Expecting (") 6080 6081 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6082 if not self._match(TokenType.R_PAREN, expression=expression): 6083 self.raise_error("Expecting )") 6084 6085 def _match_texts(self, texts, advance=True): 6086 if self._curr and self._curr.text.upper() in texts: 6087 if advance: 6088 self._advance() 6089 return True 6090 return None 6091 6092 def _match_text_seq(self, *texts, advance=True): 6093 index = self._index 6094 for text in texts: 6095 if self._curr and self._curr.text.upper() == text: 6096 self._advance() 6097 else: 6098 self._retreat(index) 6099 return None 6100 6101 if not advance: 6102 self._retreat(index) 6103 6104 return True 6105 6106 def _replace_lambda( 6107 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6108 ) -> t.Optional[exp.Expression]: 6109 if not node: 6110 return node 6111 6112 for column in node.find_all(exp.Column): 6113 if column.parts[0].name in lambda_variables: 6114 dot_or_id = column.to_dot() if column.table else column.this 6115 parent = column.parent 6116 6117 while isinstance(parent, exp.Dot): 6118 if not isinstance(parent.parent, exp.Dot): 6119 parent.replace(dot_or_id) 6120 break 6121 parent = parent.parent 6122 else: 6123 if column is node: 6124 node = dot_or_id 6125 else: 6126 column.replace(dot_or_id) 6127 return node 6128 6129 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6130 start = self._prev 6131 6132 # Not to be confused with TRUNCATE(number, decimals) function call 6133 if self._match(TokenType.L_PAREN): 6134 self._retreat(self._index - 2) 6135 return self._parse_function() 6136 6137 # Clickhouse supports TRUNCATE DATABASE as well 6138 is_database = self._match(TokenType.DATABASE) 6139 6140 self._match(TokenType.TABLE) 6141 6142 exists = self._parse_exists(not_=False) 6143 6144 expressions = self._parse_csv( 6145 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6146 ) 6147 6148 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6149 6150 if self._match_text_seq("RESTART", "IDENTITY"): 6151 identity = "RESTART" 6152 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6153 identity = "CONTINUE" 6154 else: 6155 identity = None 6156 6157 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6158 option = self._prev.text 6159 else: 6160 option = None 6161 6162 partition = self._parse_partition() 6163 6164 # Fallback case 6165 if self._curr: 6166 return self._parse_as_command(start) 6167 6168 return self.expression( 6169 exp.TruncateTable, 6170 expressions=expressions, 6171 is_database=is_database, 6172 exists=exists, 6173 cluster=cluster, 6174 identity=identity, 6175 option=option, 6176 partition=partition, 6177 ) 6178 6179 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6180 this = self._parse_ordered(self._parse_opclass) 6181 6182 if not self._match(TokenType.WITH): 6183 return this 6184 6185 op = self._parse_var(any_token=True) 6186 6187 return self.expression(exp.WithOperator, this=this, op=op)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1117 def __init__( 1118 self, 1119 error_level: t.Optional[ErrorLevel] = None, 1120 error_message_context: int = 100, 1121 max_errors: int = 3, 1122 dialect: DialectType = None, 1123 ): 1124 from sqlglot.dialects import Dialect 1125 1126 self.error_level = error_level or ErrorLevel.IMMEDIATE 1127 self.error_message_context = error_message_context 1128 self.max_errors = max_errors 1129 self.dialect = Dialect.get_or_raise(dialect) 1130 self.reset()
1142 def parse( 1143 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1144 ) -> t.List[t.Optional[exp.Expression]]: 1145 """ 1146 Parses a list of tokens and returns a list of syntax trees, one tree 1147 per parsed SQL statement. 1148 1149 Args: 1150 raw_tokens: The list of tokens. 1151 sql: The original SQL string, used to produce helpful debug messages. 1152 1153 Returns: 1154 The list of the produced syntax trees. 1155 """ 1156 return self._parse( 1157 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1158 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1160 def parse_into( 1161 self, 1162 expression_types: exp.IntoType, 1163 raw_tokens: t.List[Token], 1164 sql: t.Optional[str] = None, 1165 ) -> t.List[t.Optional[exp.Expression]]: 1166 """ 1167 Parses a list of tokens into a given Expression type. If a collection of Expression 1168 types is given instead, this method will try to parse the token list into each one 1169 of them, stopping at the first for which the parsing succeeds. 1170 1171 Args: 1172 expression_types: The expression type(s) to try and parse the token list into. 1173 raw_tokens: The list of tokens. 1174 sql: The original SQL string, used to produce helpful debug messages. 1175 1176 Returns: 1177 The target Expression. 1178 """ 1179 errors = [] 1180 for expression_type in ensure_list(expression_types): 1181 parser = self.EXPRESSION_PARSERS.get(expression_type) 1182 if not parser: 1183 raise TypeError(f"No parser registered for {expression_type}") 1184 1185 try: 1186 return self._parse(parser, raw_tokens, sql) 1187 except ParseError as e: 1188 e.errors[0]["into_expression"] = expression_type 1189 errors.append(e) 1190 1191 raise ParseError( 1192 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1193 errors=merge_errors(errors), 1194 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1231 def check_errors(self) -> None: 1232 """Logs or raises any found errors, depending on the chosen error level setting.""" 1233 if self.error_level == ErrorLevel.WARN: 1234 for error in self.errors: 1235 logger.error(str(error)) 1236 elif self.error_level == ErrorLevel.RAISE and self.errors: 1237 raise ParseError( 1238 concat_messages(self.errors, self.max_errors), 1239 errors=merge_errors(self.errors), 1240 )
Logs or raises any found errors, depending on the chosen error level setting.
1242 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1243 """ 1244 Appends an error in the list of recorded errors or raises it, depending on the chosen 1245 error level setting. 1246 """ 1247 token = token or self._curr or self._prev or Token.string("") 1248 start = token.start 1249 end = token.end + 1 1250 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1251 highlight = self.sql[start:end] 1252 end_context = self.sql[end : end + self.error_message_context] 1253 1254 error = ParseError.new( 1255 f"{message}. Line {token.line}, Col: {token.col}.\n" 1256 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1257 description=message, 1258 line=token.line, 1259 col=token.col, 1260 start_context=start_context, 1261 highlight=highlight, 1262 end_context=end_context, 1263 ) 1264 1265 if self.error_level == ErrorLevel.IMMEDIATE: 1266 raise error 1267 1268 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1270 def expression( 1271 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1272 ) -> E: 1273 """ 1274 Creates a new, validated Expression. 1275 1276 Args: 1277 exp_class: The expression class to instantiate. 1278 comments: An optional list of comments to attach to the expression. 1279 kwargs: The arguments to set for the expression along with their respective values. 1280 1281 Returns: 1282 The target expression. 1283 """ 1284 instance = exp_class(**kwargs) 1285 instance.add_comments(comments) if comments else self._add_comments(instance) 1286 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1293 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1294 """ 1295 Validates an Expression, making sure that all its mandatory arguments are set. 1296 1297 Args: 1298 expression: The expression to validate. 1299 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1300 1301 Returns: 1302 The validated expression. 1303 """ 1304 if self.error_level != ErrorLevel.IGNORE: 1305 for error_message in expression.error_messages(args): 1306 self.raise_error(error_message) 1307 1308 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.