sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 return lambda self, this: self._parse_escape( 47 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 48 ) 49 50 51def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 62 63 64def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 65 def _builder(args: t.List, dialect: Dialect) -> E: 66 expression = expr_type( 67 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 68 ) 69 if len(args) > 2 and expr_type is exp.JSONExtract: 70 expression.set("expressions", args[2:]) 71 72 return expression 73 74 return _builder 75 76 77class _Parser(type): 78 def __new__(cls, clsname, bases, attrs): 79 klass = super().__new__(cls, clsname, bases, attrs) 80 81 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 82 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 83 84 return klass 85 86 87class Parser(metaclass=_Parser): 88 """ 89 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 90 91 Args: 92 error_level: The desired error level. 93 Default: ErrorLevel.IMMEDIATE 94 error_message_context: The amount of context to capture from a query string when displaying 95 the error message (in number of characters). 96 Default: 100 97 max_errors: Maximum number of error messages to include in a raised ParseError. 98 This is only relevant if error_level is ErrorLevel.RAISE. 99 Default: 3 100 """ 101 102 FUNCTIONS: t.Dict[str, t.Callable] = { 103 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 104 "CONCAT": lambda args, dialect: exp.Concat( 105 expressions=args, 106 safe=not dialect.STRICT_STRING_CONCAT, 107 coalesce=dialect.CONCAT_COALESCE, 108 ), 109 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 110 expressions=args, 111 safe=not dialect.STRICT_STRING_CONCAT, 112 coalesce=dialect.CONCAT_COALESCE, 113 ), 114 "DATE_TO_DATE_STR": lambda args: exp.Cast( 115 this=seq_get(args, 0), 116 to=exp.DataType(this=exp.DataType.Type.TEXT), 117 ), 118 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 119 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 120 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 121 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 122 "LIKE": build_like, 123 "LOG": build_logarithm, 124 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 125 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 126 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 127 "TIME_TO_TIME_STR": lambda args: exp.Cast( 128 this=seq_get(args, 0), 129 to=exp.DataType(this=exp.DataType.Type.TEXT), 130 ), 131 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 132 this=exp.Cast( 133 this=seq_get(args, 0), 134 to=exp.DataType(this=exp.DataType.Type.TEXT), 135 ), 136 start=exp.Literal.number(1), 137 length=exp.Literal.number(10), 138 ), 139 "VAR_MAP": build_var_map, 140 } 141 142 NO_PAREN_FUNCTIONS = { 143 TokenType.CURRENT_DATE: exp.CurrentDate, 144 TokenType.CURRENT_DATETIME: exp.CurrentDate, 145 TokenType.CURRENT_TIME: exp.CurrentTime, 146 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 147 TokenType.CURRENT_USER: exp.CurrentUser, 148 } 149 150 STRUCT_TYPE_TOKENS = { 151 TokenType.NESTED, 152 TokenType.OBJECT, 153 TokenType.STRUCT, 154 } 155 156 NESTED_TYPE_TOKENS = { 157 TokenType.ARRAY, 158 TokenType.LOWCARDINALITY, 159 TokenType.MAP, 160 TokenType.NULLABLE, 161 *STRUCT_TYPE_TOKENS, 162 } 163 164 ENUM_TYPE_TOKENS = { 165 TokenType.ENUM, 166 TokenType.ENUM8, 167 TokenType.ENUM16, 168 } 169 170 AGGREGATE_TYPE_TOKENS = { 171 TokenType.AGGREGATEFUNCTION, 172 TokenType.SIMPLEAGGREGATEFUNCTION, 173 } 174 175 TYPE_TOKENS = { 176 TokenType.BIT, 177 TokenType.BOOLEAN, 178 TokenType.TINYINT, 179 TokenType.UTINYINT, 180 TokenType.SMALLINT, 181 TokenType.USMALLINT, 182 TokenType.INT, 183 TokenType.UINT, 184 TokenType.BIGINT, 185 TokenType.UBIGINT, 186 TokenType.INT128, 187 TokenType.UINT128, 188 TokenType.INT256, 189 TokenType.UINT256, 190 TokenType.MEDIUMINT, 191 TokenType.UMEDIUMINT, 192 TokenType.FIXEDSTRING, 193 TokenType.FLOAT, 194 TokenType.DOUBLE, 195 TokenType.CHAR, 196 TokenType.NCHAR, 197 TokenType.VARCHAR, 198 TokenType.NVARCHAR, 199 TokenType.BPCHAR, 200 TokenType.TEXT, 201 TokenType.MEDIUMTEXT, 202 TokenType.LONGTEXT, 203 TokenType.MEDIUMBLOB, 204 TokenType.LONGBLOB, 205 TokenType.BINARY, 206 TokenType.VARBINARY, 207 TokenType.JSON, 208 TokenType.JSONB, 209 TokenType.INTERVAL, 210 TokenType.TINYBLOB, 211 TokenType.TINYTEXT, 212 TokenType.TIME, 213 TokenType.TIMETZ, 214 TokenType.TIMESTAMP, 215 TokenType.TIMESTAMP_S, 216 TokenType.TIMESTAMP_MS, 217 TokenType.TIMESTAMP_NS, 218 TokenType.TIMESTAMPTZ, 219 TokenType.TIMESTAMPLTZ, 220 TokenType.DATETIME, 221 TokenType.DATETIME64, 222 TokenType.DATE, 223 TokenType.DATE32, 224 TokenType.INT4RANGE, 225 TokenType.INT4MULTIRANGE, 226 TokenType.INT8RANGE, 227 TokenType.INT8MULTIRANGE, 228 TokenType.NUMRANGE, 229 TokenType.NUMMULTIRANGE, 230 TokenType.TSRANGE, 231 TokenType.TSMULTIRANGE, 232 TokenType.TSTZRANGE, 233 TokenType.TSTZMULTIRANGE, 234 TokenType.DATERANGE, 235 TokenType.DATEMULTIRANGE, 236 TokenType.DECIMAL, 237 TokenType.UDECIMAL, 238 TokenType.BIGDECIMAL, 239 TokenType.UUID, 240 TokenType.GEOGRAPHY, 241 TokenType.GEOMETRY, 242 TokenType.HLLSKETCH, 243 TokenType.HSTORE, 244 TokenType.PSEUDO_TYPE, 245 TokenType.SUPER, 246 TokenType.SERIAL, 247 TokenType.SMALLSERIAL, 248 TokenType.BIGSERIAL, 249 TokenType.XML, 250 TokenType.YEAR, 251 TokenType.UNIQUEIDENTIFIER, 252 TokenType.USERDEFINED, 253 TokenType.MONEY, 254 TokenType.SMALLMONEY, 255 TokenType.ROWVERSION, 256 TokenType.IMAGE, 257 TokenType.VARIANT, 258 TokenType.OBJECT, 259 TokenType.OBJECT_IDENTIFIER, 260 TokenType.INET, 261 TokenType.IPADDRESS, 262 TokenType.IPPREFIX, 263 TokenType.IPV4, 264 TokenType.IPV6, 265 TokenType.UNKNOWN, 266 TokenType.NULL, 267 TokenType.NAME, 268 *ENUM_TYPE_TOKENS, 269 *NESTED_TYPE_TOKENS, 270 *AGGREGATE_TYPE_TOKENS, 271 } 272 273 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 274 TokenType.BIGINT: TokenType.UBIGINT, 275 TokenType.INT: TokenType.UINT, 276 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 277 TokenType.SMALLINT: TokenType.USMALLINT, 278 TokenType.TINYINT: TokenType.UTINYINT, 279 TokenType.DECIMAL: TokenType.UDECIMAL, 280 } 281 282 SUBQUERY_PREDICATES = { 283 TokenType.ANY: exp.Any, 284 TokenType.ALL: exp.All, 285 TokenType.EXISTS: exp.Exists, 286 TokenType.SOME: exp.Any, 287 } 288 289 RESERVED_TOKENS = { 290 *Tokenizer.SINGLE_TOKENS.values(), 291 TokenType.SELECT, 292 } 293 294 DB_CREATABLES = { 295 TokenType.DATABASE, 296 TokenType.SCHEMA, 297 TokenType.TABLE, 298 TokenType.VIEW, 299 TokenType.MODEL, 300 TokenType.DICTIONARY, 301 TokenType.SEQUENCE, 302 TokenType.STORAGE_INTEGRATION, 303 } 304 305 CREATABLES = { 306 TokenType.COLUMN, 307 TokenType.CONSTRAINT, 308 TokenType.FUNCTION, 309 TokenType.INDEX, 310 TokenType.PROCEDURE, 311 TokenType.FOREIGN_KEY, 312 *DB_CREATABLES, 313 } 314 315 # Tokens that can represent identifiers 316 ID_VAR_TOKENS = { 317 TokenType.VAR, 318 TokenType.ANTI, 319 TokenType.APPLY, 320 TokenType.ASC, 321 TokenType.ASOF, 322 TokenType.AUTO_INCREMENT, 323 TokenType.BEGIN, 324 TokenType.BPCHAR, 325 TokenType.CACHE, 326 TokenType.CASE, 327 TokenType.COLLATE, 328 TokenType.COMMAND, 329 TokenType.COMMENT, 330 TokenType.COMMIT, 331 TokenType.CONSTRAINT, 332 TokenType.DEFAULT, 333 TokenType.DELETE, 334 TokenType.DESC, 335 TokenType.DESCRIBE, 336 TokenType.DICTIONARY, 337 TokenType.DIV, 338 TokenType.END, 339 TokenType.EXECUTE, 340 TokenType.ESCAPE, 341 TokenType.FALSE, 342 TokenType.FIRST, 343 TokenType.FILTER, 344 TokenType.FINAL, 345 TokenType.FORMAT, 346 TokenType.FULL, 347 TokenType.IS, 348 TokenType.ISNULL, 349 TokenType.INTERVAL, 350 TokenType.KEEP, 351 TokenType.KILL, 352 TokenType.LEFT, 353 TokenType.LOAD, 354 TokenType.MERGE, 355 TokenType.NATURAL, 356 TokenType.NEXT, 357 TokenType.OFFSET, 358 TokenType.OPERATOR, 359 TokenType.ORDINALITY, 360 TokenType.OVERLAPS, 361 TokenType.OVERWRITE, 362 TokenType.PARTITION, 363 TokenType.PERCENT, 364 TokenType.PIVOT, 365 TokenType.PRAGMA, 366 TokenType.RANGE, 367 TokenType.RECURSIVE, 368 TokenType.REFERENCES, 369 TokenType.REFRESH, 370 TokenType.REPLACE, 371 TokenType.RIGHT, 372 TokenType.ROW, 373 TokenType.ROWS, 374 TokenType.SEMI, 375 TokenType.SET, 376 TokenType.SETTINGS, 377 TokenType.SHOW, 378 TokenType.TEMPORARY, 379 TokenType.TOP, 380 TokenType.TRUE, 381 TokenType.TRUNCATE, 382 TokenType.UNIQUE, 383 TokenType.UNPIVOT, 384 TokenType.UPDATE, 385 TokenType.USE, 386 TokenType.VOLATILE, 387 TokenType.WINDOW, 388 *CREATABLES, 389 *SUBQUERY_PREDICATES, 390 *TYPE_TOKENS, 391 *NO_PAREN_FUNCTIONS, 392 } 393 394 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 395 396 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 397 TokenType.ANTI, 398 TokenType.APPLY, 399 TokenType.ASOF, 400 TokenType.FULL, 401 TokenType.LEFT, 402 TokenType.LOCK, 403 TokenType.NATURAL, 404 TokenType.OFFSET, 405 TokenType.RIGHT, 406 TokenType.SEMI, 407 TokenType.WINDOW, 408 } 409 410 ALIAS_TOKENS = ID_VAR_TOKENS 411 412 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 413 414 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 415 416 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 417 418 FUNC_TOKENS = { 419 TokenType.COLLATE, 420 TokenType.COMMAND, 421 TokenType.CURRENT_DATE, 422 TokenType.CURRENT_DATETIME, 423 TokenType.CURRENT_TIMESTAMP, 424 TokenType.CURRENT_TIME, 425 TokenType.CURRENT_USER, 426 TokenType.FILTER, 427 TokenType.FIRST, 428 TokenType.FORMAT, 429 TokenType.GLOB, 430 TokenType.IDENTIFIER, 431 TokenType.INDEX, 432 TokenType.ISNULL, 433 TokenType.ILIKE, 434 TokenType.INSERT, 435 TokenType.LIKE, 436 TokenType.MERGE, 437 TokenType.OFFSET, 438 TokenType.PRIMARY_KEY, 439 TokenType.RANGE, 440 TokenType.REPLACE, 441 TokenType.RLIKE, 442 TokenType.ROW, 443 TokenType.UNNEST, 444 TokenType.VAR, 445 TokenType.LEFT, 446 TokenType.RIGHT, 447 TokenType.SEQUENCE, 448 TokenType.DATE, 449 TokenType.DATETIME, 450 TokenType.TABLE, 451 TokenType.TIMESTAMP, 452 TokenType.TIMESTAMPTZ, 453 TokenType.TRUNCATE, 454 TokenType.WINDOW, 455 TokenType.XOR, 456 *TYPE_TOKENS, 457 *SUBQUERY_PREDICATES, 458 } 459 460 CONJUNCTION = { 461 TokenType.AND: exp.And, 462 TokenType.OR: exp.Or, 463 } 464 465 EQUALITY = { 466 TokenType.COLON_EQ: exp.PropertyEQ, 467 TokenType.EQ: exp.EQ, 468 TokenType.NEQ: exp.NEQ, 469 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 470 } 471 472 COMPARISON = { 473 TokenType.GT: exp.GT, 474 TokenType.GTE: exp.GTE, 475 TokenType.LT: exp.LT, 476 TokenType.LTE: exp.LTE, 477 } 478 479 BITWISE = { 480 TokenType.AMP: exp.BitwiseAnd, 481 TokenType.CARET: exp.BitwiseXor, 482 TokenType.PIPE: exp.BitwiseOr, 483 } 484 485 TERM = { 486 TokenType.DASH: exp.Sub, 487 TokenType.PLUS: exp.Add, 488 TokenType.MOD: exp.Mod, 489 TokenType.COLLATE: exp.Collate, 490 } 491 492 FACTOR = { 493 TokenType.DIV: exp.IntDiv, 494 TokenType.LR_ARROW: exp.Distance, 495 TokenType.SLASH: exp.Div, 496 TokenType.STAR: exp.Mul, 497 } 498 499 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 500 501 TIMES = { 502 TokenType.TIME, 503 TokenType.TIMETZ, 504 } 505 506 TIMESTAMPS = { 507 TokenType.TIMESTAMP, 508 TokenType.TIMESTAMPTZ, 509 TokenType.TIMESTAMPLTZ, 510 *TIMES, 511 } 512 513 SET_OPERATIONS = { 514 TokenType.UNION, 515 TokenType.INTERSECT, 516 TokenType.EXCEPT, 517 } 518 519 JOIN_METHODS = { 520 TokenType.ASOF, 521 TokenType.NATURAL, 522 TokenType.POSITIONAL, 523 } 524 525 JOIN_SIDES = { 526 TokenType.LEFT, 527 TokenType.RIGHT, 528 TokenType.FULL, 529 } 530 531 JOIN_KINDS = { 532 TokenType.INNER, 533 TokenType.OUTER, 534 TokenType.CROSS, 535 TokenType.SEMI, 536 TokenType.ANTI, 537 } 538 539 JOIN_HINTS: t.Set[str] = set() 540 541 LAMBDAS = { 542 TokenType.ARROW: lambda self, expressions: self.expression( 543 exp.Lambda, 544 this=self._replace_lambda( 545 self._parse_conjunction(), 546 {node.name for node in expressions}, 547 ), 548 expressions=expressions, 549 ), 550 TokenType.FARROW: lambda self, expressions: self.expression( 551 exp.Kwarg, 552 this=exp.var(expressions[0].name), 553 expression=self._parse_conjunction(), 554 ), 555 } 556 557 COLUMN_OPERATORS = { 558 TokenType.DOT: None, 559 TokenType.DCOLON: lambda self, this, to: self.expression( 560 exp.Cast if self.STRICT_CAST else exp.TryCast, 561 this=this, 562 to=to, 563 ), 564 TokenType.ARROW: lambda self, this, path: self.expression( 565 exp.JSONExtract, 566 this=this, 567 expression=self.dialect.to_json_path(path), 568 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 569 ), 570 TokenType.DARROW: lambda self, this, path: self.expression( 571 exp.JSONExtractScalar, 572 this=this, 573 expression=self.dialect.to_json_path(path), 574 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 575 ), 576 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 577 exp.JSONBExtract, 578 this=this, 579 expression=path, 580 ), 581 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 582 exp.JSONBExtractScalar, 583 this=this, 584 expression=path, 585 ), 586 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 587 exp.JSONBContains, 588 this=this, 589 expression=key, 590 ), 591 } 592 593 EXPRESSION_PARSERS = { 594 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 595 exp.Column: lambda self: self._parse_column(), 596 exp.Condition: lambda self: self._parse_conjunction(), 597 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 598 exp.Expression: lambda self: self._parse_expression(), 599 exp.From: lambda self: self._parse_from(), 600 exp.Group: lambda self: self._parse_group(), 601 exp.Having: lambda self: self._parse_having(), 602 exp.Identifier: lambda self: self._parse_id_var(), 603 exp.Join: lambda self: self._parse_join(), 604 exp.Lambda: lambda self: self._parse_lambda(), 605 exp.Lateral: lambda self: self._parse_lateral(), 606 exp.Limit: lambda self: self._parse_limit(), 607 exp.Offset: lambda self: self._parse_offset(), 608 exp.Order: lambda self: self._parse_order(), 609 exp.Ordered: lambda self: self._parse_ordered(), 610 exp.Properties: lambda self: self._parse_properties(), 611 exp.Qualify: lambda self: self._parse_qualify(), 612 exp.Returning: lambda self: self._parse_returning(), 613 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 614 exp.Table: lambda self: self._parse_table_parts(), 615 exp.TableAlias: lambda self: self._parse_table_alias(), 616 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 617 exp.Where: lambda self: self._parse_where(), 618 exp.Window: lambda self: self._parse_named_window(), 619 exp.With: lambda self: self._parse_with(), 620 "JOIN_TYPE": lambda self: self._parse_join_parts(), 621 } 622 623 STATEMENT_PARSERS = { 624 TokenType.ALTER: lambda self: self._parse_alter(), 625 TokenType.BEGIN: lambda self: self._parse_transaction(), 626 TokenType.CACHE: lambda self: self._parse_cache(), 627 TokenType.COMMENT: lambda self: self._parse_comment(), 628 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 629 TokenType.CREATE: lambda self: self._parse_create(), 630 TokenType.DELETE: lambda self: self._parse_delete(), 631 TokenType.DESC: lambda self: self._parse_describe(), 632 TokenType.DESCRIBE: lambda self: self._parse_describe(), 633 TokenType.DROP: lambda self: self._parse_drop(), 634 TokenType.INSERT: lambda self: self._parse_insert(), 635 TokenType.KILL: lambda self: self._parse_kill(), 636 TokenType.LOAD: lambda self: self._parse_load(), 637 TokenType.MERGE: lambda self: self._parse_merge(), 638 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 639 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 640 TokenType.REFRESH: lambda self: self._parse_refresh(), 641 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 642 TokenType.SET: lambda self: self._parse_set(), 643 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 644 TokenType.UNCACHE: lambda self: self._parse_uncache(), 645 TokenType.UPDATE: lambda self: self._parse_update(), 646 TokenType.USE: lambda self: self.expression( 647 exp.Use, 648 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 649 this=self._parse_table(schema=False), 650 ), 651 } 652 653 UNARY_PARSERS = { 654 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 655 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 656 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 657 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 658 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 659 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 660 } 661 662 STRING_PARSERS = { 663 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 664 exp.RawString, this=token.text 665 ), 666 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 667 exp.National, this=token.text 668 ), 669 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 670 TokenType.STRING: lambda self, token: self.expression( 671 exp.Literal, this=token.text, is_string=True 672 ), 673 TokenType.UNICODE_STRING: lambda self, token: self.expression( 674 exp.UnicodeString, 675 this=token.text, 676 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 677 ), 678 } 679 680 NUMERIC_PARSERS = { 681 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 682 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 683 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 684 TokenType.NUMBER: lambda self, token: self.expression( 685 exp.Literal, this=token.text, is_string=False 686 ), 687 } 688 689 PRIMARY_PARSERS = { 690 **STRING_PARSERS, 691 **NUMERIC_PARSERS, 692 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 693 TokenType.NULL: lambda self, _: self.expression(exp.Null), 694 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 695 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 696 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 697 TokenType.STAR: lambda self, _: self.expression( 698 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 699 ), 700 } 701 702 PLACEHOLDER_PARSERS = { 703 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 704 TokenType.PARAMETER: lambda self: self._parse_parameter(), 705 TokenType.COLON: lambda self: ( 706 self.expression(exp.Placeholder, this=self._prev.text) 707 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 708 else None 709 ), 710 } 711 712 RANGE_PARSERS = { 713 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 714 TokenType.GLOB: binary_range_parser(exp.Glob), 715 TokenType.ILIKE: binary_range_parser(exp.ILike), 716 TokenType.IN: lambda self, this: self._parse_in(this), 717 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 718 TokenType.IS: lambda self, this: self._parse_is(this), 719 TokenType.LIKE: binary_range_parser(exp.Like), 720 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 721 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 722 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 723 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 724 } 725 726 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 727 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 728 "AUTO": lambda self: self._parse_auto_property(), 729 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 730 "BACKUP": lambda self: self.expression( 731 exp.BackupProperty, this=self._parse_var(any_token=True) 732 ), 733 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 734 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 735 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 736 "CHECKSUM": lambda self: self._parse_checksum(), 737 "CLUSTER BY": lambda self: self._parse_cluster(), 738 "CLUSTERED": lambda self: self._parse_clustered_by(), 739 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 740 exp.CollateProperty, **kwargs 741 ), 742 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 743 "CONTAINS": lambda self: self._parse_contains_property(), 744 "COPY": lambda self: self._parse_copy_property(), 745 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 746 "DEFINER": lambda self: self._parse_definer(), 747 "DETERMINISTIC": lambda self: self.expression( 748 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 749 ), 750 "DISTKEY": lambda self: self._parse_distkey(), 751 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 752 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 753 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 754 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 755 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 756 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 757 "FREESPACE": lambda self: self._parse_freespace(), 758 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 759 "HEAP": lambda self: self.expression(exp.HeapProperty), 760 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 761 "IMMUTABLE": lambda self: self.expression( 762 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 763 ), 764 "INHERITS": lambda self: self.expression( 765 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 766 ), 767 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 768 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 769 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 770 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 771 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 772 "LIKE": lambda self: self._parse_create_like(), 773 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 774 "LOCK": lambda self: self._parse_locking(), 775 "LOCKING": lambda self: self._parse_locking(), 776 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 777 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 778 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 779 "MODIFIES": lambda self: self._parse_modifies_property(), 780 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 781 "NO": lambda self: self._parse_no_property(), 782 "ON": lambda self: self._parse_on_property(), 783 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 784 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 785 "PARTITION": lambda self: self._parse_partitioned_of(), 786 "PARTITION BY": lambda self: self._parse_partitioned_by(), 787 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 788 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 789 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 790 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 791 "READS": lambda self: self._parse_reads_property(), 792 "REMOTE": lambda self: self._parse_remote_with_connection(), 793 "RETURNS": lambda self: self._parse_returns(), 794 "ROW": lambda self: self._parse_row(), 795 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 796 "SAMPLE": lambda self: self.expression( 797 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 798 ), 799 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 800 "SETTINGS": lambda self: self.expression( 801 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 802 ), 803 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 804 "SORTKEY": lambda self: self._parse_sortkey(), 805 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 806 "STABLE": lambda self: self.expression( 807 exp.StabilityProperty, this=exp.Literal.string("STABLE") 808 ), 809 "STORED": lambda self: self._parse_stored(), 810 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 811 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 812 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 813 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 814 "TO": lambda self: self._parse_to_table(), 815 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 816 "TRANSFORM": lambda self: self.expression( 817 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 818 ), 819 "TTL": lambda self: self._parse_ttl(), 820 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 821 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 822 "VOLATILE": lambda self: self._parse_volatile_property(), 823 "WITH": lambda self: self._parse_with_property(), 824 } 825 826 CONSTRAINT_PARSERS = { 827 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 828 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 829 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 830 "CHARACTER SET": lambda self: self.expression( 831 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 832 ), 833 "CHECK": lambda self: self.expression( 834 exp.CheckColumnConstraint, 835 this=self._parse_wrapped(self._parse_conjunction), 836 enforced=self._match_text_seq("ENFORCED"), 837 ), 838 "COLLATE": lambda self: self.expression( 839 exp.CollateColumnConstraint, this=self._parse_var() 840 ), 841 "COMMENT": lambda self: self.expression( 842 exp.CommentColumnConstraint, this=self._parse_string() 843 ), 844 "COMPRESS": lambda self: self._parse_compress(), 845 "CLUSTERED": lambda self: self.expression( 846 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 847 ), 848 "NONCLUSTERED": lambda self: self.expression( 849 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 850 ), 851 "DEFAULT": lambda self: self.expression( 852 exp.DefaultColumnConstraint, this=self._parse_bitwise() 853 ), 854 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 855 "EXCLUDE": lambda self: self.expression( 856 exp.ExcludeColumnConstraint, this=self._parse_index_params() 857 ), 858 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 859 "FORMAT": lambda self: self.expression( 860 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 861 ), 862 "GENERATED": lambda self: self._parse_generated_as_identity(), 863 "IDENTITY": lambda self: self._parse_auto_increment(), 864 "INLINE": lambda self: self._parse_inline(), 865 "LIKE": lambda self: self._parse_create_like(), 866 "NOT": lambda self: self._parse_not_constraint(), 867 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 868 "ON": lambda self: ( 869 self._match(TokenType.UPDATE) 870 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 871 ) 872 or self.expression(exp.OnProperty, this=self._parse_id_var()), 873 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 874 "PERIOD": lambda self: self._parse_period_for_system_time(), 875 "PRIMARY KEY": lambda self: self._parse_primary_key(), 876 "REFERENCES": lambda self: self._parse_references(match=False), 877 "TITLE": lambda self: self.expression( 878 exp.TitleColumnConstraint, this=self._parse_var_or_string() 879 ), 880 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 881 "UNIQUE": lambda self: self._parse_unique(), 882 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 883 "WITH": lambda self: self.expression( 884 exp.Properties, expressions=self._parse_wrapped_properties() 885 ), 886 } 887 888 ALTER_PARSERS = { 889 "ADD": lambda self: self._parse_alter_table_add(), 890 "ALTER": lambda self: self._parse_alter_table_alter(), 891 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 892 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 893 "DROP": lambda self: self._parse_alter_table_drop(), 894 "RENAME": lambda self: self._parse_alter_table_rename(), 895 } 896 897 SCHEMA_UNNAMED_CONSTRAINTS = { 898 "CHECK", 899 "EXCLUDE", 900 "FOREIGN KEY", 901 "LIKE", 902 "PERIOD", 903 "PRIMARY KEY", 904 "UNIQUE", 905 } 906 907 NO_PAREN_FUNCTION_PARSERS = { 908 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 909 "CASE": lambda self: self._parse_case(), 910 "IF": lambda self: self._parse_if(), 911 "NEXT": lambda self: self._parse_next_value_for(), 912 } 913 914 INVALID_FUNC_NAME_TOKENS = { 915 TokenType.IDENTIFIER, 916 TokenType.STRING, 917 } 918 919 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 920 921 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 922 923 FUNCTION_PARSERS = { 924 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 925 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 926 "DECODE": lambda self: self._parse_decode(), 927 "EXTRACT": lambda self: self._parse_extract(), 928 "JSON_OBJECT": lambda self: self._parse_json_object(), 929 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 930 "JSON_TABLE": lambda self: self._parse_json_table(), 931 "MATCH": lambda self: self._parse_match_against(), 932 "OPENJSON": lambda self: self._parse_open_json(), 933 "POSITION": lambda self: self._parse_position(), 934 "PREDICT": lambda self: self._parse_predict(), 935 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 936 "STRING_AGG": lambda self: self._parse_string_agg(), 937 "SUBSTRING": lambda self: self._parse_substring(), 938 "TRIM": lambda self: self._parse_trim(), 939 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 940 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 941 } 942 943 QUERY_MODIFIER_PARSERS = { 944 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 945 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 946 TokenType.WHERE: lambda self: ("where", self._parse_where()), 947 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 948 TokenType.HAVING: lambda self: ("having", self._parse_having()), 949 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 950 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 951 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 952 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 953 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 954 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 955 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 956 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 957 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 958 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 959 TokenType.CLUSTER_BY: lambda self: ( 960 "cluster", 961 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 962 ), 963 TokenType.DISTRIBUTE_BY: lambda self: ( 964 "distribute", 965 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 966 ), 967 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 968 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 969 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 970 } 971 972 SET_PARSERS = { 973 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 974 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 975 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 976 "TRANSACTION": lambda self: self._parse_set_transaction(), 977 } 978 979 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 980 981 TYPE_LITERAL_PARSERS = { 982 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 983 } 984 985 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 986 987 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 988 989 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 990 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 991 "ISOLATION": ( 992 ("LEVEL", "REPEATABLE", "READ"), 993 ("LEVEL", "READ", "COMMITTED"), 994 ("LEVEL", "READ", "UNCOMITTED"), 995 ("LEVEL", "SERIALIZABLE"), 996 ), 997 "READ": ("WRITE", "ONLY"), 998 } 999 1000 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1001 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1002 ) 1003 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1004 1005 CREATE_SEQUENCE: OPTIONS_TYPE = { 1006 "SCALE": ("EXTEND", "NOEXTEND"), 1007 "SHARD": ("EXTEND", "NOEXTEND"), 1008 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1009 **dict.fromkeys( 1010 ( 1011 "SESSION", 1012 "GLOBAL", 1013 "KEEP", 1014 "NOKEEP", 1015 "ORDER", 1016 "NOORDER", 1017 "NOCACHE", 1018 "CYCLE", 1019 "NOCYCLE", 1020 "NOMINVALUE", 1021 "NOMAXVALUE", 1022 "NOSCALE", 1023 "NOSHARD", 1024 ), 1025 tuple(), 1026 ), 1027 } 1028 1029 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1030 1031 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1032 1033 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1034 1035 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1036 1037 CLONE_KEYWORDS = {"CLONE", "COPY"} 1038 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1039 1040 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1041 1042 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1043 1044 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1045 1046 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1047 1048 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1049 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1050 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1051 1052 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1053 1054 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1055 1056 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1057 1058 DISTINCT_TOKENS = {TokenType.DISTINCT} 1059 1060 NULL_TOKENS = {TokenType.NULL} 1061 1062 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1063 1064 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1065 1066 STRICT_CAST = True 1067 1068 PREFIXED_PIVOT_COLUMNS = False 1069 IDENTIFY_PIVOT_STRINGS = False 1070 1071 LOG_DEFAULTS_TO_LN = False 1072 1073 # Whether ADD is present for each column added by ALTER TABLE 1074 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1075 1076 # Whether the table sample clause expects CSV syntax 1077 TABLESAMPLE_CSV = False 1078 1079 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1080 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1081 1082 # Whether the TRIM function expects the characters to trim as its first argument 1083 TRIM_PATTERN_FIRST = False 1084 1085 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1086 STRING_ALIASES = False 1087 1088 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1089 MODIFIERS_ATTACHED_TO_UNION = True 1090 UNION_MODIFIERS = {"order", "limit", "offset"} 1091 1092 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1093 NO_PAREN_IF_COMMANDS = True 1094 1095 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1096 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1097 1098 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1099 # If this is True and '(' is not found, the keyword will be treated as an identifier 1100 VALUES_FOLLOWED_BY_PAREN = True 1101 1102 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1103 SUPPORTS_IMPLICIT_UNNEST = False 1104 1105 __slots__ = ( 1106 "error_level", 1107 "error_message_context", 1108 "max_errors", 1109 "dialect", 1110 "sql", 1111 "errors", 1112 "_tokens", 1113 "_index", 1114 "_curr", 1115 "_next", 1116 "_prev", 1117 "_prev_comments", 1118 ) 1119 1120 # Autofilled 1121 SHOW_TRIE: t.Dict = {} 1122 SET_TRIE: t.Dict = {} 1123 1124 def __init__( 1125 self, 1126 error_level: t.Optional[ErrorLevel] = None, 1127 error_message_context: int = 100, 1128 max_errors: int = 3, 1129 dialect: DialectType = None, 1130 ): 1131 from sqlglot.dialects import Dialect 1132 1133 self.error_level = error_level or ErrorLevel.IMMEDIATE 1134 self.error_message_context = error_message_context 1135 self.max_errors = max_errors 1136 self.dialect = Dialect.get_or_raise(dialect) 1137 self.reset() 1138 1139 def reset(self): 1140 self.sql = "" 1141 self.errors = [] 1142 self._tokens = [] 1143 self._index = 0 1144 self._curr = None 1145 self._next = None 1146 self._prev = None 1147 self._prev_comments = None 1148 1149 def parse( 1150 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1151 ) -> t.List[t.Optional[exp.Expression]]: 1152 """ 1153 Parses a list of tokens and returns a list of syntax trees, one tree 1154 per parsed SQL statement. 1155 1156 Args: 1157 raw_tokens: The list of tokens. 1158 sql: The original SQL string, used to produce helpful debug messages. 1159 1160 Returns: 1161 The list of the produced syntax trees. 1162 """ 1163 return self._parse( 1164 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1165 ) 1166 1167 def parse_into( 1168 self, 1169 expression_types: exp.IntoType, 1170 raw_tokens: t.List[Token], 1171 sql: t.Optional[str] = None, 1172 ) -> t.List[t.Optional[exp.Expression]]: 1173 """ 1174 Parses a list of tokens into a given Expression type. If a collection of Expression 1175 types is given instead, this method will try to parse the token list into each one 1176 of them, stopping at the first for which the parsing succeeds. 1177 1178 Args: 1179 expression_types: The expression type(s) to try and parse the token list into. 1180 raw_tokens: The list of tokens. 1181 sql: The original SQL string, used to produce helpful debug messages. 1182 1183 Returns: 1184 The target Expression. 1185 """ 1186 errors = [] 1187 for expression_type in ensure_list(expression_types): 1188 parser = self.EXPRESSION_PARSERS.get(expression_type) 1189 if not parser: 1190 raise TypeError(f"No parser registered for {expression_type}") 1191 1192 try: 1193 return self._parse(parser, raw_tokens, sql) 1194 except ParseError as e: 1195 e.errors[0]["into_expression"] = expression_type 1196 errors.append(e) 1197 1198 raise ParseError( 1199 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1200 errors=merge_errors(errors), 1201 ) from errors[-1] 1202 1203 def _parse( 1204 self, 1205 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1206 raw_tokens: t.List[Token], 1207 sql: t.Optional[str] = None, 1208 ) -> t.List[t.Optional[exp.Expression]]: 1209 self.reset() 1210 self.sql = sql or "" 1211 1212 total = len(raw_tokens) 1213 chunks: t.List[t.List[Token]] = [[]] 1214 1215 for i, token in enumerate(raw_tokens): 1216 if token.token_type == TokenType.SEMICOLON: 1217 if i < total - 1: 1218 chunks.append([]) 1219 else: 1220 chunks[-1].append(token) 1221 1222 expressions = [] 1223 1224 for tokens in chunks: 1225 self._index = -1 1226 self._tokens = tokens 1227 self._advance() 1228 1229 expressions.append(parse_method(self)) 1230 1231 if self._index < len(self._tokens): 1232 self.raise_error("Invalid expression / Unexpected token") 1233 1234 self.check_errors() 1235 1236 return expressions 1237 1238 def check_errors(self) -> None: 1239 """Logs or raises any found errors, depending on the chosen error level setting.""" 1240 if self.error_level == ErrorLevel.WARN: 1241 for error in self.errors: 1242 logger.error(str(error)) 1243 elif self.error_level == ErrorLevel.RAISE and self.errors: 1244 raise ParseError( 1245 concat_messages(self.errors, self.max_errors), 1246 errors=merge_errors(self.errors), 1247 ) 1248 1249 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1250 """ 1251 Appends an error in the list of recorded errors or raises it, depending on the chosen 1252 error level setting. 1253 """ 1254 token = token or self._curr or self._prev or Token.string("") 1255 start = token.start 1256 end = token.end + 1 1257 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1258 highlight = self.sql[start:end] 1259 end_context = self.sql[end : end + self.error_message_context] 1260 1261 error = ParseError.new( 1262 f"{message}. Line {token.line}, Col: {token.col}.\n" 1263 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1264 description=message, 1265 line=token.line, 1266 col=token.col, 1267 start_context=start_context, 1268 highlight=highlight, 1269 end_context=end_context, 1270 ) 1271 1272 if self.error_level == ErrorLevel.IMMEDIATE: 1273 raise error 1274 1275 self.errors.append(error) 1276 1277 def expression( 1278 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1279 ) -> E: 1280 """ 1281 Creates a new, validated Expression. 1282 1283 Args: 1284 exp_class: The expression class to instantiate. 1285 comments: An optional list of comments to attach to the expression. 1286 kwargs: The arguments to set for the expression along with their respective values. 1287 1288 Returns: 1289 The target expression. 1290 """ 1291 instance = exp_class(**kwargs) 1292 instance.add_comments(comments) if comments else self._add_comments(instance) 1293 return self.validate_expression(instance) 1294 1295 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1296 if expression and self._prev_comments: 1297 expression.add_comments(self._prev_comments) 1298 self._prev_comments = None 1299 1300 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1301 """ 1302 Validates an Expression, making sure that all its mandatory arguments are set. 1303 1304 Args: 1305 expression: The expression to validate. 1306 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1307 1308 Returns: 1309 The validated expression. 1310 """ 1311 if self.error_level != ErrorLevel.IGNORE: 1312 for error_message in expression.error_messages(args): 1313 self.raise_error(error_message) 1314 1315 return expression 1316 1317 def _find_sql(self, start: Token, end: Token) -> str: 1318 return self.sql[start.start : end.end + 1] 1319 1320 def _is_connected(self) -> bool: 1321 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1322 1323 def _advance(self, times: int = 1) -> None: 1324 self._index += times 1325 self._curr = seq_get(self._tokens, self._index) 1326 self._next = seq_get(self._tokens, self._index + 1) 1327 1328 if self._index > 0: 1329 self._prev = self._tokens[self._index - 1] 1330 self._prev_comments = self._prev.comments 1331 else: 1332 self._prev = None 1333 self._prev_comments = None 1334 1335 def _retreat(self, index: int) -> None: 1336 if index != self._index: 1337 self._advance(index - self._index) 1338 1339 def _warn_unsupported(self) -> None: 1340 if len(self._tokens) <= 1: 1341 return 1342 1343 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1344 # interested in emitting a warning for the one being currently processed. 1345 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1346 1347 logger.warning( 1348 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1349 ) 1350 1351 def _parse_command(self) -> exp.Command: 1352 self._warn_unsupported() 1353 return self.expression( 1354 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1355 ) 1356 1357 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1358 """ 1359 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1360 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1361 the parser state accordingly 1362 """ 1363 index = self._index 1364 error_level = self.error_level 1365 1366 self.error_level = ErrorLevel.IMMEDIATE 1367 try: 1368 this = parse_method() 1369 except ParseError: 1370 this = None 1371 finally: 1372 if not this or retreat: 1373 self._retreat(index) 1374 self.error_level = error_level 1375 1376 return this 1377 1378 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1379 start = self._prev 1380 exists = self._parse_exists() if allow_exists else None 1381 1382 self._match(TokenType.ON) 1383 1384 kind = self._match_set(self.CREATABLES) and self._prev 1385 if not kind: 1386 return self._parse_as_command(start) 1387 1388 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1389 this = self._parse_user_defined_function(kind=kind.token_type) 1390 elif kind.token_type == TokenType.TABLE: 1391 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1392 elif kind.token_type == TokenType.COLUMN: 1393 this = self._parse_column() 1394 else: 1395 this = self._parse_id_var() 1396 1397 self._match(TokenType.IS) 1398 1399 return self.expression( 1400 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1401 ) 1402 1403 def _parse_to_table( 1404 self, 1405 ) -> exp.ToTableProperty: 1406 table = self._parse_table_parts(schema=True) 1407 return self.expression(exp.ToTableProperty, this=table) 1408 1409 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1410 def _parse_ttl(self) -> exp.Expression: 1411 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1412 this = self._parse_bitwise() 1413 1414 if self._match_text_seq("DELETE"): 1415 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1416 if self._match_text_seq("RECOMPRESS"): 1417 return self.expression( 1418 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1419 ) 1420 if self._match_text_seq("TO", "DISK"): 1421 return self.expression( 1422 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1423 ) 1424 if self._match_text_seq("TO", "VOLUME"): 1425 return self.expression( 1426 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1427 ) 1428 1429 return this 1430 1431 expressions = self._parse_csv(_parse_ttl_action) 1432 where = self._parse_where() 1433 group = self._parse_group() 1434 1435 aggregates = None 1436 if group and self._match(TokenType.SET): 1437 aggregates = self._parse_csv(self._parse_set_item) 1438 1439 return self.expression( 1440 exp.MergeTreeTTL, 1441 expressions=expressions, 1442 where=where, 1443 group=group, 1444 aggregates=aggregates, 1445 ) 1446 1447 def _parse_statement(self) -> t.Optional[exp.Expression]: 1448 if self._curr is None: 1449 return None 1450 1451 if self._match_set(self.STATEMENT_PARSERS): 1452 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1453 1454 if self._match_set(Tokenizer.COMMANDS): 1455 return self._parse_command() 1456 1457 expression = self._parse_expression() 1458 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1459 return self._parse_query_modifiers(expression) 1460 1461 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1462 start = self._prev 1463 temporary = self._match(TokenType.TEMPORARY) 1464 materialized = self._match_text_seq("MATERIALIZED") 1465 1466 kind = self._match_set(self.CREATABLES) and self._prev.text 1467 if not kind: 1468 return self._parse_as_command(start) 1469 1470 if_exists = exists or self._parse_exists() 1471 table = self._parse_table_parts( 1472 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1473 ) 1474 1475 if self._match(TokenType.L_PAREN, advance=False): 1476 expressions = self._parse_wrapped_csv(self._parse_types) 1477 else: 1478 expressions = None 1479 1480 return self.expression( 1481 exp.Drop, 1482 comments=start.comments, 1483 exists=if_exists, 1484 this=table, 1485 expressions=expressions, 1486 kind=kind, 1487 temporary=temporary, 1488 materialized=materialized, 1489 cascade=self._match_text_seq("CASCADE"), 1490 constraints=self._match_text_seq("CONSTRAINTS"), 1491 purge=self._match_text_seq("PURGE"), 1492 ) 1493 1494 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1495 return ( 1496 self._match_text_seq("IF") 1497 and (not not_ or self._match(TokenType.NOT)) 1498 and self._match(TokenType.EXISTS) 1499 ) 1500 1501 def _parse_create(self) -> exp.Create | exp.Command: 1502 # Note: this can't be None because we've matched a statement parser 1503 start = self._prev 1504 comments = self._prev_comments 1505 1506 replace = ( 1507 start.token_type == TokenType.REPLACE 1508 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1509 or self._match_pair(TokenType.OR, TokenType.ALTER) 1510 ) 1511 1512 unique = self._match(TokenType.UNIQUE) 1513 1514 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1515 self._advance() 1516 1517 properties = None 1518 create_token = self._match_set(self.CREATABLES) and self._prev 1519 1520 if not create_token: 1521 # exp.Properties.Location.POST_CREATE 1522 properties = self._parse_properties() 1523 create_token = self._match_set(self.CREATABLES) and self._prev 1524 1525 if not properties or not create_token: 1526 return self._parse_as_command(start) 1527 1528 exists = self._parse_exists(not_=True) 1529 this = None 1530 expression: t.Optional[exp.Expression] = None 1531 indexes = None 1532 no_schema_binding = None 1533 begin = None 1534 end = None 1535 clone = None 1536 1537 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1538 nonlocal properties 1539 if properties and temp_props: 1540 properties.expressions.extend(temp_props.expressions) 1541 elif temp_props: 1542 properties = temp_props 1543 1544 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1545 this = self._parse_user_defined_function(kind=create_token.token_type) 1546 1547 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1548 extend_props(self._parse_properties()) 1549 1550 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1551 1552 if not expression: 1553 if self._match(TokenType.COMMAND): 1554 expression = self._parse_as_command(self._prev) 1555 else: 1556 begin = self._match(TokenType.BEGIN) 1557 return_ = self._match_text_seq("RETURN") 1558 1559 if self._match(TokenType.STRING, advance=False): 1560 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1561 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1562 expression = self._parse_string() 1563 extend_props(self._parse_properties()) 1564 else: 1565 expression = self._parse_statement() 1566 1567 end = self._match_text_seq("END") 1568 1569 if return_: 1570 expression = self.expression(exp.Return, this=expression) 1571 elif create_token.token_type == TokenType.INDEX: 1572 this = self._parse_index(index=self._parse_id_var()) 1573 elif create_token.token_type in self.DB_CREATABLES: 1574 table_parts = self._parse_table_parts( 1575 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1576 ) 1577 1578 # exp.Properties.Location.POST_NAME 1579 self._match(TokenType.COMMA) 1580 extend_props(self._parse_properties(before=True)) 1581 1582 this = self._parse_schema(this=table_parts) 1583 1584 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1585 extend_props(self._parse_properties()) 1586 1587 self._match(TokenType.ALIAS) 1588 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1589 # exp.Properties.Location.POST_ALIAS 1590 extend_props(self._parse_properties()) 1591 1592 if create_token.token_type == TokenType.SEQUENCE: 1593 expression = self._parse_types() 1594 extend_props(self._parse_properties()) 1595 else: 1596 expression = self._parse_ddl_select() 1597 1598 if create_token.token_type == TokenType.TABLE: 1599 # exp.Properties.Location.POST_EXPRESSION 1600 extend_props(self._parse_properties()) 1601 1602 indexes = [] 1603 while True: 1604 index = self._parse_index() 1605 1606 # exp.Properties.Location.POST_INDEX 1607 extend_props(self._parse_properties()) 1608 1609 if not index: 1610 break 1611 else: 1612 self._match(TokenType.COMMA) 1613 indexes.append(index) 1614 elif create_token.token_type == TokenType.VIEW: 1615 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1616 no_schema_binding = True 1617 1618 shallow = self._match_text_seq("SHALLOW") 1619 1620 if self._match_texts(self.CLONE_KEYWORDS): 1621 copy = self._prev.text.lower() == "copy" 1622 clone = self.expression( 1623 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1624 ) 1625 1626 if self._curr: 1627 return self._parse_as_command(start) 1628 1629 return self.expression( 1630 exp.Create, 1631 comments=comments, 1632 this=this, 1633 kind=create_token.text.upper(), 1634 replace=replace, 1635 unique=unique, 1636 expression=expression, 1637 exists=exists, 1638 properties=properties, 1639 indexes=indexes, 1640 no_schema_binding=no_schema_binding, 1641 begin=begin, 1642 end=end, 1643 clone=clone, 1644 ) 1645 1646 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1647 seq = exp.SequenceProperties() 1648 1649 options = [] 1650 index = self._index 1651 1652 while self._curr: 1653 if self._match_text_seq("INCREMENT"): 1654 self._match_text_seq("BY") 1655 self._match_text_seq("=") 1656 seq.set("increment", self._parse_term()) 1657 elif self._match_text_seq("MINVALUE"): 1658 seq.set("minvalue", self._parse_term()) 1659 elif self._match_text_seq("MAXVALUE"): 1660 seq.set("maxvalue", self._parse_term()) 1661 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1662 self._match_text_seq("=") 1663 seq.set("start", self._parse_term()) 1664 elif self._match_text_seq("CACHE"): 1665 # T-SQL allows empty CACHE which is initialized dynamically 1666 seq.set("cache", self._parse_number() or True) 1667 elif self._match_text_seq("OWNED", "BY"): 1668 # "OWNED BY NONE" is the default 1669 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1670 else: 1671 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1672 if opt: 1673 options.append(opt) 1674 else: 1675 break 1676 1677 seq.set("options", options if options else None) 1678 return None if self._index == index else seq 1679 1680 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1681 # only used for teradata currently 1682 self._match(TokenType.COMMA) 1683 1684 kwargs = { 1685 "no": self._match_text_seq("NO"), 1686 "dual": self._match_text_seq("DUAL"), 1687 "before": self._match_text_seq("BEFORE"), 1688 "default": self._match_text_seq("DEFAULT"), 1689 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1690 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1691 "after": self._match_text_seq("AFTER"), 1692 "minimum": self._match_texts(("MIN", "MINIMUM")), 1693 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1694 } 1695 1696 if self._match_texts(self.PROPERTY_PARSERS): 1697 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1698 try: 1699 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1700 except TypeError: 1701 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1702 1703 return None 1704 1705 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1706 return self._parse_wrapped_csv(self._parse_property) 1707 1708 def _parse_property(self) -> t.Optional[exp.Expression]: 1709 if self._match_texts(self.PROPERTY_PARSERS): 1710 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1711 1712 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1713 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1714 1715 if self._match_text_seq("COMPOUND", "SORTKEY"): 1716 return self._parse_sortkey(compound=True) 1717 1718 if self._match_text_seq("SQL", "SECURITY"): 1719 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1720 1721 index = self._index 1722 key = self._parse_column() 1723 1724 if not self._match(TokenType.EQ): 1725 self._retreat(index) 1726 return self._parse_sequence_properties() 1727 1728 return self.expression( 1729 exp.Property, 1730 this=key.to_dot() if isinstance(key, exp.Column) else key, 1731 value=self._parse_column() or self._parse_var(any_token=True), 1732 ) 1733 1734 def _parse_stored(self) -> exp.FileFormatProperty: 1735 self._match(TokenType.ALIAS) 1736 1737 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1738 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1739 1740 return self.expression( 1741 exp.FileFormatProperty, 1742 this=( 1743 self.expression( 1744 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1745 ) 1746 if input_format or output_format 1747 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1748 ), 1749 ) 1750 1751 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1752 self._match(TokenType.EQ) 1753 self._match(TokenType.ALIAS) 1754 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1755 1756 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1757 properties = [] 1758 while True: 1759 if before: 1760 prop = self._parse_property_before() 1761 else: 1762 prop = self._parse_property() 1763 if not prop: 1764 break 1765 for p in ensure_list(prop): 1766 properties.append(p) 1767 1768 if properties: 1769 return self.expression(exp.Properties, expressions=properties) 1770 1771 return None 1772 1773 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1774 return self.expression( 1775 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1776 ) 1777 1778 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1779 if self._index >= 2: 1780 pre_volatile_token = self._tokens[self._index - 2] 1781 else: 1782 pre_volatile_token = None 1783 1784 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1785 return exp.VolatileProperty() 1786 1787 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1788 1789 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1790 self._match_pair(TokenType.EQ, TokenType.ON) 1791 1792 prop = self.expression(exp.WithSystemVersioningProperty) 1793 if self._match(TokenType.L_PAREN): 1794 self._match_text_seq("HISTORY_TABLE", "=") 1795 prop.set("this", self._parse_table_parts()) 1796 1797 if self._match(TokenType.COMMA): 1798 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1799 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1800 1801 self._match_r_paren() 1802 1803 return prop 1804 1805 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1806 if self._match(TokenType.L_PAREN, advance=False): 1807 return self._parse_wrapped_properties() 1808 1809 if self._match_text_seq("JOURNAL"): 1810 return self._parse_withjournaltable() 1811 1812 if self._match_texts(self.VIEW_ATTRIBUTES): 1813 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1814 1815 if self._match_text_seq("DATA"): 1816 return self._parse_withdata(no=False) 1817 elif self._match_text_seq("NO", "DATA"): 1818 return self._parse_withdata(no=True) 1819 1820 if not self._next: 1821 return None 1822 1823 return self._parse_withisolatedloading() 1824 1825 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1826 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1827 self._match(TokenType.EQ) 1828 1829 user = self._parse_id_var() 1830 self._match(TokenType.PARAMETER) 1831 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1832 1833 if not user or not host: 1834 return None 1835 1836 return exp.DefinerProperty(this=f"{user}@{host}") 1837 1838 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1839 self._match(TokenType.TABLE) 1840 self._match(TokenType.EQ) 1841 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1842 1843 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1844 return self.expression(exp.LogProperty, no=no) 1845 1846 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1847 return self.expression(exp.JournalProperty, **kwargs) 1848 1849 def _parse_checksum(self) -> exp.ChecksumProperty: 1850 self._match(TokenType.EQ) 1851 1852 on = None 1853 if self._match(TokenType.ON): 1854 on = True 1855 elif self._match_text_seq("OFF"): 1856 on = False 1857 1858 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1859 1860 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1861 return self.expression( 1862 exp.Cluster, 1863 expressions=( 1864 self._parse_wrapped_csv(self._parse_ordered) 1865 if wrapped 1866 else self._parse_csv(self._parse_ordered) 1867 ), 1868 ) 1869 1870 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1871 self._match_text_seq("BY") 1872 1873 self._match_l_paren() 1874 expressions = self._parse_csv(self._parse_column) 1875 self._match_r_paren() 1876 1877 if self._match_text_seq("SORTED", "BY"): 1878 self._match_l_paren() 1879 sorted_by = self._parse_csv(self._parse_ordered) 1880 self._match_r_paren() 1881 else: 1882 sorted_by = None 1883 1884 self._match(TokenType.INTO) 1885 buckets = self._parse_number() 1886 self._match_text_seq("BUCKETS") 1887 1888 return self.expression( 1889 exp.ClusteredByProperty, 1890 expressions=expressions, 1891 sorted_by=sorted_by, 1892 buckets=buckets, 1893 ) 1894 1895 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1896 if not self._match_text_seq("GRANTS"): 1897 self._retreat(self._index - 1) 1898 return None 1899 1900 return self.expression(exp.CopyGrantsProperty) 1901 1902 def _parse_freespace(self) -> exp.FreespaceProperty: 1903 self._match(TokenType.EQ) 1904 return self.expression( 1905 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1906 ) 1907 1908 def _parse_mergeblockratio( 1909 self, no: bool = False, default: bool = False 1910 ) -> exp.MergeBlockRatioProperty: 1911 if self._match(TokenType.EQ): 1912 return self.expression( 1913 exp.MergeBlockRatioProperty, 1914 this=self._parse_number(), 1915 percent=self._match(TokenType.PERCENT), 1916 ) 1917 1918 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1919 1920 def _parse_datablocksize( 1921 self, 1922 default: t.Optional[bool] = None, 1923 minimum: t.Optional[bool] = None, 1924 maximum: t.Optional[bool] = None, 1925 ) -> exp.DataBlocksizeProperty: 1926 self._match(TokenType.EQ) 1927 size = self._parse_number() 1928 1929 units = None 1930 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1931 units = self._prev.text 1932 1933 return self.expression( 1934 exp.DataBlocksizeProperty, 1935 size=size, 1936 units=units, 1937 default=default, 1938 minimum=minimum, 1939 maximum=maximum, 1940 ) 1941 1942 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1943 self._match(TokenType.EQ) 1944 always = self._match_text_seq("ALWAYS") 1945 manual = self._match_text_seq("MANUAL") 1946 never = self._match_text_seq("NEVER") 1947 default = self._match_text_seq("DEFAULT") 1948 1949 autotemp = None 1950 if self._match_text_seq("AUTOTEMP"): 1951 autotemp = self._parse_schema() 1952 1953 return self.expression( 1954 exp.BlockCompressionProperty, 1955 always=always, 1956 manual=manual, 1957 never=never, 1958 default=default, 1959 autotemp=autotemp, 1960 ) 1961 1962 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 1963 index = self._index 1964 no = self._match_text_seq("NO") 1965 concurrent = self._match_text_seq("CONCURRENT") 1966 1967 if not self._match_text_seq("ISOLATED", "LOADING"): 1968 self._retreat(index) 1969 return None 1970 1971 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 1972 return self.expression( 1973 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 1974 ) 1975 1976 def _parse_locking(self) -> exp.LockingProperty: 1977 if self._match(TokenType.TABLE): 1978 kind = "TABLE" 1979 elif self._match(TokenType.VIEW): 1980 kind = "VIEW" 1981 elif self._match(TokenType.ROW): 1982 kind = "ROW" 1983 elif self._match_text_seq("DATABASE"): 1984 kind = "DATABASE" 1985 else: 1986 kind = None 1987 1988 if kind in ("DATABASE", "TABLE", "VIEW"): 1989 this = self._parse_table_parts() 1990 else: 1991 this = None 1992 1993 if self._match(TokenType.FOR): 1994 for_or_in = "FOR" 1995 elif self._match(TokenType.IN): 1996 for_or_in = "IN" 1997 else: 1998 for_or_in = None 1999 2000 if self._match_text_seq("ACCESS"): 2001 lock_type = "ACCESS" 2002 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2003 lock_type = "EXCLUSIVE" 2004 elif self._match_text_seq("SHARE"): 2005 lock_type = "SHARE" 2006 elif self._match_text_seq("READ"): 2007 lock_type = "READ" 2008 elif self._match_text_seq("WRITE"): 2009 lock_type = "WRITE" 2010 elif self._match_text_seq("CHECKSUM"): 2011 lock_type = "CHECKSUM" 2012 else: 2013 lock_type = None 2014 2015 override = self._match_text_seq("OVERRIDE") 2016 2017 return self.expression( 2018 exp.LockingProperty, 2019 this=this, 2020 kind=kind, 2021 for_or_in=for_or_in, 2022 lock_type=lock_type, 2023 override=override, 2024 ) 2025 2026 def _parse_partition_by(self) -> t.List[exp.Expression]: 2027 if self._match(TokenType.PARTITION_BY): 2028 return self._parse_csv(self._parse_conjunction) 2029 return [] 2030 2031 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2032 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2033 if self._match_text_seq("MINVALUE"): 2034 return exp.var("MINVALUE") 2035 if self._match_text_seq("MAXVALUE"): 2036 return exp.var("MAXVALUE") 2037 return self._parse_bitwise() 2038 2039 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2040 expression = None 2041 from_expressions = None 2042 to_expressions = None 2043 2044 if self._match(TokenType.IN): 2045 this = self._parse_wrapped_csv(self._parse_bitwise) 2046 elif self._match(TokenType.FROM): 2047 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2048 self._match_text_seq("TO") 2049 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2050 elif self._match_text_seq("WITH", "(", "MODULUS"): 2051 this = self._parse_number() 2052 self._match_text_seq(",", "REMAINDER") 2053 expression = self._parse_number() 2054 self._match_r_paren() 2055 else: 2056 self.raise_error("Failed to parse partition bound spec.") 2057 2058 return self.expression( 2059 exp.PartitionBoundSpec, 2060 this=this, 2061 expression=expression, 2062 from_expressions=from_expressions, 2063 to_expressions=to_expressions, 2064 ) 2065 2066 # https://www.postgresql.org/docs/current/sql-createtable.html 2067 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2068 if not self._match_text_seq("OF"): 2069 self._retreat(self._index - 1) 2070 return None 2071 2072 this = self._parse_table(schema=True) 2073 2074 if self._match(TokenType.DEFAULT): 2075 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2076 elif self._match_text_seq("FOR", "VALUES"): 2077 expression = self._parse_partition_bound_spec() 2078 else: 2079 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2080 2081 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2082 2083 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2084 self._match(TokenType.EQ) 2085 return self.expression( 2086 exp.PartitionedByProperty, 2087 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2088 ) 2089 2090 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2091 if self._match_text_seq("AND", "STATISTICS"): 2092 statistics = True 2093 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2094 statistics = False 2095 else: 2096 statistics = None 2097 2098 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2099 2100 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2101 if self._match_text_seq("SQL"): 2102 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2103 return None 2104 2105 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2106 if self._match_text_seq("SQL", "DATA"): 2107 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2108 return None 2109 2110 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2111 if self._match_text_seq("PRIMARY", "INDEX"): 2112 return exp.NoPrimaryIndexProperty() 2113 if self._match_text_seq("SQL"): 2114 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2115 return None 2116 2117 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2118 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2119 return exp.OnCommitProperty() 2120 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2121 return exp.OnCommitProperty(delete=True) 2122 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2123 2124 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2125 if self._match_text_seq("SQL", "DATA"): 2126 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2127 return None 2128 2129 def _parse_distkey(self) -> exp.DistKeyProperty: 2130 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2131 2132 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2133 table = self._parse_table(schema=True) 2134 2135 options = [] 2136 while self._match_texts(("INCLUDING", "EXCLUDING")): 2137 this = self._prev.text.upper() 2138 2139 id_var = self._parse_id_var() 2140 if not id_var: 2141 return None 2142 2143 options.append( 2144 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2145 ) 2146 2147 return self.expression(exp.LikeProperty, this=table, expressions=options) 2148 2149 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2150 return self.expression( 2151 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2152 ) 2153 2154 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2155 self._match(TokenType.EQ) 2156 return self.expression( 2157 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2158 ) 2159 2160 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2161 self._match_text_seq("WITH", "CONNECTION") 2162 return self.expression( 2163 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2164 ) 2165 2166 def _parse_returns(self) -> exp.ReturnsProperty: 2167 value: t.Optional[exp.Expression] 2168 is_table = self._match(TokenType.TABLE) 2169 2170 if is_table: 2171 if self._match(TokenType.LT): 2172 value = self.expression( 2173 exp.Schema, 2174 this="TABLE", 2175 expressions=self._parse_csv(self._parse_struct_types), 2176 ) 2177 if not self._match(TokenType.GT): 2178 self.raise_error("Expecting >") 2179 else: 2180 value = self._parse_schema(exp.var("TABLE")) 2181 else: 2182 value = self._parse_types() 2183 2184 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2185 2186 def _parse_describe(self) -> exp.Describe: 2187 kind = self._match_set(self.CREATABLES) and self._prev.text 2188 extended = self._match_text_seq("EXTENDED") 2189 this = self._parse_table(schema=True) 2190 properties = self._parse_properties() 2191 expressions = properties.expressions if properties else None 2192 return self.expression( 2193 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2194 ) 2195 2196 def _parse_insert(self) -> exp.Insert: 2197 comments = ensure_list(self._prev_comments) 2198 hint = self._parse_hint() 2199 overwrite = self._match(TokenType.OVERWRITE) 2200 ignore = self._match(TokenType.IGNORE) 2201 local = self._match_text_seq("LOCAL") 2202 alternative = None 2203 is_function = None 2204 2205 if self._match_text_seq("DIRECTORY"): 2206 this: t.Optional[exp.Expression] = self.expression( 2207 exp.Directory, 2208 this=self._parse_var_or_string(), 2209 local=local, 2210 row_format=self._parse_row_format(match_row=True), 2211 ) 2212 else: 2213 if self._match(TokenType.OR): 2214 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2215 2216 self._match(TokenType.INTO) 2217 comments += ensure_list(self._prev_comments) 2218 self._match(TokenType.TABLE) 2219 is_function = self._match(TokenType.FUNCTION) 2220 2221 this = self._parse_table(schema=True) if not is_function else self._parse_function() 2222 2223 returning = self._parse_returning() 2224 2225 return self.expression( 2226 exp.Insert, 2227 comments=comments, 2228 hint=hint, 2229 is_function=is_function, 2230 this=this, 2231 by_name=self._match_text_seq("BY", "NAME"), 2232 exists=self._parse_exists(), 2233 partition=self._parse_partition(), 2234 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2235 and self._parse_conjunction(), 2236 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2237 conflict=self._parse_on_conflict(), 2238 returning=returning or self._parse_returning(), 2239 overwrite=overwrite, 2240 alternative=alternative, 2241 ignore=ignore, 2242 ) 2243 2244 def _parse_kill(self) -> exp.Kill: 2245 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2246 2247 return self.expression( 2248 exp.Kill, 2249 this=self._parse_primary(), 2250 kind=kind, 2251 ) 2252 2253 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2254 conflict = self._match_text_seq("ON", "CONFLICT") 2255 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2256 2257 if not conflict and not duplicate: 2258 return None 2259 2260 conflict_keys = None 2261 constraint = None 2262 2263 if conflict: 2264 if self._match_text_seq("ON", "CONSTRAINT"): 2265 constraint = self._parse_id_var() 2266 elif self._match(TokenType.L_PAREN): 2267 conflict_keys = self._parse_csv(self._parse_id_var) 2268 self._match_r_paren() 2269 2270 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2271 if self._prev.token_type == TokenType.UPDATE: 2272 self._match(TokenType.SET) 2273 expressions = self._parse_csv(self._parse_equality) 2274 else: 2275 expressions = None 2276 2277 return self.expression( 2278 exp.OnConflict, 2279 duplicate=duplicate, 2280 expressions=expressions, 2281 action=action, 2282 conflict_keys=conflict_keys, 2283 constraint=constraint, 2284 ) 2285 2286 def _parse_returning(self) -> t.Optional[exp.Returning]: 2287 if not self._match(TokenType.RETURNING): 2288 return None 2289 return self.expression( 2290 exp.Returning, 2291 expressions=self._parse_csv(self._parse_expression), 2292 into=self._match(TokenType.INTO) and self._parse_table_part(), 2293 ) 2294 2295 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2296 if not self._match(TokenType.FORMAT): 2297 return None 2298 return self._parse_row_format() 2299 2300 def _parse_row_format( 2301 self, match_row: bool = False 2302 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2303 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2304 return None 2305 2306 if self._match_text_seq("SERDE"): 2307 this = self._parse_string() 2308 2309 serde_properties = None 2310 if self._match(TokenType.SERDE_PROPERTIES): 2311 serde_properties = self.expression( 2312 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2313 ) 2314 2315 return self.expression( 2316 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2317 ) 2318 2319 self._match_text_seq("DELIMITED") 2320 2321 kwargs = {} 2322 2323 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2324 kwargs["fields"] = self._parse_string() 2325 if self._match_text_seq("ESCAPED", "BY"): 2326 kwargs["escaped"] = self._parse_string() 2327 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2328 kwargs["collection_items"] = self._parse_string() 2329 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2330 kwargs["map_keys"] = self._parse_string() 2331 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2332 kwargs["lines"] = self._parse_string() 2333 if self._match_text_seq("NULL", "DEFINED", "AS"): 2334 kwargs["null"] = self._parse_string() 2335 2336 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2337 2338 def _parse_load(self) -> exp.LoadData | exp.Command: 2339 if self._match_text_seq("DATA"): 2340 local = self._match_text_seq("LOCAL") 2341 self._match_text_seq("INPATH") 2342 inpath = self._parse_string() 2343 overwrite = self._match(TokenType.OVERWRITE) 2344 self._match_pair(TokenType.INTO, TokenType.TABLE) 2345 2346 return self.expression( 2347 exp.LoadData, 2348 this=self._parse_table(schema=True), 2349 local=local, 2350 overwrite=overwrite, 2351 inpath=inpath, 2352 partition=self._parse_partition(), 2353 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2354 serde=self._match_text_seq("SERDE") and self._parse_string(), 2355 ) 2356 return self._parse_as_command(self._prev) 2357 2358 def _parse_delete(self) -> exp.Delete: 2359 # This handles MySQL's "Multiple-Table Syntax" 2360 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2361 tables = None 2362 comments = self._prev_comments 2363 if not self._match(TokenType.FROM, advance=False): 2364 tables = self._parse_csv(self._parse_table) or None 2365 2366 returning = self._parse_returning() 2367 2368 return self.expression( 2369 exp.Delete, 2370 comments=comments, 2371 tables=tables, 2372 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2373 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2374 where=self._parse_where(), 2375 returning=returning or self._parse_returning(), 2376 limit=self._parse_limit(), 2377 ) 2378 2379 def _parse_update(self) -> exp.Update: 2380 comments = self._prev_comments 2381 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2382 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2383 returning = self._parse_returning() 2384 return self.expression( 2385 exp.Update, 2386 comments=comments, 2387 **{ # type: ignore 2388 "this": this, 2389 "expressions": expressions, 2390 "from": self._parse_from(joins=True), 2391 "where": self._parse_where(), 2392 "returning": returning or self._parse_returning(), 2393 "order": self._parse_order(), 2394 "limit": self._parse_limit(), 2395 }, 2396 ) 2397 2398 def _parse_uncache(self) -> exp.Uncache: 2399 if not self._match(TokenType.TABLE): 2400 self.raise_error("Expecting TABLE after UNCACHE") 2401 2402 return self.expression( 2403 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2404 ) 2405 2406 def _parse_cache(self) -> exp.Cache: 2407 lazy = self._match_text_seq("LAZY") 2408 self._match(TokenType.TABLE) 2409 table = self._parse_table(schema=True) 2410 2411 options = [] 2412 if self._match_text_seq("OPTIONS"): 2413 self._match_l_paren() 2414 k = self._parse_string() 2415 self._match(TokenType.EQ) 2416 v = self._parse_string() 2417 options = [k, v] 2418 self._match_r_paren() 2419 2420 self._match(TokenType.ALIAS) 2421 return self.expression( 2422 exp.Cache, 2423 this=table, 2424 lazy=lazy, 2425 options=options, 2426 expression=self._parse_select(nested=True), 2427 ) 2428 2429 def _parse_partition(self) -> t.Optional[exp.Partition]: 2430 if not self._match(TokenType.PARTITION): 2431 return None 2432 2433 return self.expression( 2434 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2435 ) 2436 2437 def _parse_value(self) -> exp.Tuple: 2438 if self._match(TokenType.L_PAREN): 2439 expressions = self._parse_csv(self._parse_expression) 2440 self._match_r_paren() 2441 return self.expression(exp.Tuple, expressions=expressions) 2442 2443 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2444 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2445 2446 def _parse_projections(self) -> t.List[exp.Expression]: 2447 return self._parse_expressions() 2448 2449 def _parse_select( 2450 self, 2451 nested: bool = False, 2452 table: bool = False, 2453 parse_subquery_alias: bool = True, 2454 parse_set_operation: bool = True, 2455 ) -> t.Optional[exp.Expression]: 2456 cte = self._parse_with() 2457 2458 if cte: 2459 this = self._parse_statement() 2460 2461 if not this: 2462 self.raise_error("Failed to parse any statement following CTE") 2463 return cte 2464 2465 if "with" in this.arg_types: 2466 this.set("with", cte) 2467 else: 2468 self.raise_error(f"{this.key} does not support CTE") 2469 this = cte 2470 2471 return this 2472 2473 # duckdb supports leading with FROM x 2474 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2475 2476 if self._match(TokenType.SELECT): 2477 comments = self._prev_comments 2478 2479 hint = self._parse_hint() 2480 all_ = self._match(TokenType.ALL) 2481 distinct = self._match_set(self.DISTINCT_TOKENS) 2482 2483 kind = ( 2484 self._match(TokenType.ALIAS) 2485 and self._match_texts(("STRUCT", "VALUE")) 2486 and self._prev.text.upper() 2487 ) 2488 2489 if distinct: 2490 distinct = self.expression( 2491 exp.Distinct, 2492 on=self._parse_value() if self._match(TokenType.ON) else None, 2493 ) 2494 2495 if all_ and distinct: 2496 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2497 2498 limit = self._parse_limit(top=True) 2499 projections = self._parse_projections() 2500 2501 this = self.expression( 2502 exp.Select, 2503 kind=kind, 2504 hint=hint, 2505 distinct=distinct, 2506 expressions=projections, 2507 limit=limit, 2508 ) 2509 this.comments = comments 2510 2511 into = self._parse_into() 2512 if into: 2513 this.set("into", into) 2514 2515 if not from_: 2516 from_ = self._parse_from() 2517 2518 if from_: 2519 this.set("from", from_) 2520 2521 this = self._parse_query_modifiers(this) 2522 elif (table or nested) and self._match(TokenType.L_PAREN): 2523 if self._match(TokenType.PIVOT): 2524 this = self._parse_simplified_pivot() 2525 elif self._match(TokenType.FROM): 2526 this = exp.select("*").from_( 2527 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2528 ) 2529 else: 2530 this = ( 2531 self._parse_table() 2532 if table 2533 else self._parse_select(nested=True, parse_set_operation=False) 2534 ) 2535 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2536 2537 self._match_r_paren() 2538 2539 # We return early here so that the UNION isn't attached to the subquery by the 2540 # following call to _parse_set_operations, but instead becomes the parent node 2541 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2542 elif self._match(TokenType.VALUES, advance=False): 2543 this = self._parse_derived_table_values() 2544 elif from_: 2545 this = exp.select("*").from_(from_.this, copy=False) 2546 else: 2547 this = None 2548 2549 if parse_set_operation: 2550 return self._parse_set_operations(this) 2551 return this 2552 2553 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2554 if not skip_with_token and not self._match(TokenType.WITH): 2555 return None 2556 2557 comments = self._prev_comments 2558 recursive = self._match(TokenType.RECURSIVE) 2559 2560 expressions = [] 2561 while True: 2562 expressions.append(self._parse_cte()) 2563 2564 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2565 break 2566 else: 2567 self._match(TokenType.WITH) 2568 2569 return self.expression( 2570 exp.With, comments=comments, expressions=expressions, recursive=recursive 2571 ) 2572 2573 def _parse_cte(self) -> exp.CTE: 2574 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2575 if not alias or not alias.this: 2576 self.raise_error("Expected CTE to have alias") 2577 2578 self._match(TokenType.ALIAS) 2579 2580 if self._match_text_seq("NOT", "MATERIALIZED"): 2581 materialized = False 2582 elif self._match_text_seq("MATERIALIZED"): 2583 materialized = True 2584 else: 2585 materialized = None 2586 2587 return self.expression( 2588 exp.CTE, 2589 this=self._parse_wrapped(self._parse_statement), 2590 alias=alias, 2591 materialized=materialized, 2592 ) 2593 2594 def _parse_table_alias( 2595 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2596 ) -> t.Optional[exp.TableAlias]: 2597 any_token = self._match(TokenType.ALIAS) 2598 alias = ( 2599 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2600 or self._parse_string_as_identifier() 2601 ) 2602 2603 index = self._index 2604 if self._match(TokenType.L_PAREN): 2605 columns = self._parse_csv(self._parse_function_parameter) 2606 self._match_r_paren() if columns else self._retreat(index) 2607 else: 2608 columns = None 2609 2610 if not alias and not columns: 2611 return None 2612 2613 return self.expression(exp.TableAlias, this=alias, columns=columns) 2614 2615 def _parse_subquery( 2616 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2617 ) -> t.Optional[exp.Subquery]: 2618 if not this: 2619 return None 2620 2621 return self.expression( 2622 exp.Subquery, 2623 this=this, 2624 pivots=self._parse_pivots(), 2625 alias=self._parse_table_alias() if parse_alias else None, 2626 ) 2627 2628 def _implicit_unnests_to_explicit(self, this: E) -> E: 2629 from sqlglot.optimizer.normalize_identifiers import ( 2630 normalize_identifiers as _norm, 2631 ) 2632 2633 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2634 for i, join in enumerate(this.args.get("joins") or []): 2635 table = join.this 2636 normalized_table = table.copy() 2637 normalized_table.meta["maybe_column"] = True 2638 normalized_table = _norm(normalized_table, dialect=self.dialect) 2639 2640 if isinstance(table, exp.Table) and not join.args.get("on"): 2641 if normalized_table.parts[0].name in refs: 2642 table_as_column = table.to_column() 2643 unnest = exp.Unnest(expressions=[table_as_column]) 2644 2645 # Table.to_column creates a parent Alias node that we want to convert to 2646 # a TableAlias and attach to the Unnest, so it matches the parser's output 2647 if isinstance(table.args.get("alias"), exp.TableAlias): 2648 table_as_column.replace(table_as_column.this) 2649 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2650 2651 table.replace(unnest) 2652 2653 refs.add(normalized_table.alias_or_name) 2654 2655 return this 2656 2657 def _parse_query_modifiers( 2658 self, this: t.Optional[exp.Expression] 2659 ) -> t.Optional[exp.Expression]: 2660 if isinstance(this, (exp.Query, exp.Table)): 2661 for join in iter(self._parse_join, None): 2662 this.append("joins", join) 2663 for lateral in iter(self._parse_lateral, None): 2664 this.append("laterals", lateral) 2665 2666 while True: 2667 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2668 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2669 key, expression = parser(self) 2670 2671 if expression: 2672 this.set(key, expression) 2673 if key == "limit": 2674 offset = expression.args.pop("offset", None) 2675 2676 if offset: 2677 offset = exp.Offset(expression=offset) 2678 this.set("offset", offset) 2679 2680 limit_by_expressions = expression.expressions 2681 expression.set("expressions", None) 2682 offset.set("expressions", limit_by_expressions) 2683 continue 2684 break 2685 2686 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2687 this = self._implicit_unnests_to_explicit(this) 2688 2689 return this 2690 2691 def _parse_hint(self) -> t.Optional[exp.Hint]: 2692 if self._match(TokenType.HINT): 2693 hints = [] 2694 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2695 hints.extend(hint) 2696 2697 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2698 self.raise_error("Expected */ after HINT") 2699 2700 return self.expression(exp.Hint, expressions=hints) 2701 2702 return None 2703 2704 def _parse_into(self) -> t.Optional[exp.Into]: 2705 if not self._match(TokenType.INTO): 2706 return None 2707 2708 temp = self._match(TokenType.TEMPORARY) 2709 unlogged = self._match_text_seq("UNLOGGED") 2710 self._match(TokenType.TABLE) 2711 2712 return self.expression( 2713 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2714 ) 2715 2716 def _parse_from( 2717 self, joins: bool = False, skip_from_token: bool = False 2718 ) -> t.Optional[exp.From]: 2719 if not skip_from_token and not self._match(TokenType.FROM): 2720 return None 2721 2722 return self.expression( 2723 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2724 ) 2725 2726 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2727 if not self._match(TokenType.MATCH_RECOGNIZE): 2728 return None 2729 2730 self._match_l_paren() 2731 2732 partition = self._parse_partition_by() 2733 order = self._parse_order() 2734 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2735 2736 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2737 rows = exp.var("ONE ROW PER MATCH") 2738 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2739 text = "ALL ROWS PER MATCH" 2740 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2741 text += " SHOW EMPTY MATCHES" 2742 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2743 text += " OMIT EMPTY MATCHES" 2744 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2745 text += " WITH UNMATCHED ROWS" 2746 rows = exp.var(text) 2747 else: 2748 rows = None 2749 2750 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2751 text = "AFTER MATCH SKIP" 2752 if self._match_text_seq("PAST", "LAST", "ROW"): 2753 text += " PAST LAST ROW" 2754 elif self._match_text_seq("TO", "NEXT", "ROW"): 2755 text += " TO NEXT ROW" 2756 elif self._match_text_seq("TO", "FIRST"): 2757 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2758 elif self._match_text_seq("TO", "LAST"): 2759 text += f" TO LAST {self._advance_any().text}" # type: ignore 2760 after = exp.var(text) 2761 else: 2762 after = None 2763 2764 if self._match_text_seq("PATTERN"): 2765 self._match_l_paren() 2766 2767 if not self._curr: 2768 self.raise_error("Expecting )", self._curr) 2769 2770 paren = 1 2771 start = self._curr 2772 2773 while self._curr and paren > 0: 2774 if self._curr.token_type == TokenType.L_PAREN: 2775 paren += 1 2776 if self._curr.token_type == TokenType.R_PAREN: 2777 paren -= 1 2778 2779 end = self._prev 2780 self._advance() 2781 2782 if paren > 0: 2783 self.raise_error("Expecting )", self._curr) 2784 2785 pattern = exp.var(self._find_sql(start, end)) 2786 else: 2787 pattern = None 2788 2789 define = ( 2790 self._parse_csv(self._parse_name_as_expression) 2791 if self._match_text_seq("DEFINE") 2792 else None 2793 ) 2794 2795 self._match_r_paren() 2796 2797 return self.expression( 2798 exp.MatchRecognize, 2799 partition_by=partition, 2800 order=order, 2801 measures=measures, 2802 rows=rows, 2803 after=after, 2804 pattern=pattern, 2805 define=define, 2806 alias=self._parse_table_alias(), 2807 ) 2808 2809 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2810 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2811 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2812 cross_apply = False 2813 2814 if cross_apply is not None: 2815 this = self._parse_select(table=True) 2816 view = None 2817 outer = None 2818 elif self._match(TokenType.LATERAL): 2819 this = self._parse_select(table=True) 2820 view = self._match(TokenType.VIEW) 2821 outer = self._match(TokenType.OUTER) 2822 else: 2823 return None 2824 2825 if not this: 2826 this = ( 2827 self._parse_unnest() 2828 or self._parse_function() 2829 or self._parse_id_var(any_token=False) 2830 ) 2831 2832 while self._match(TokenType.DOT): 2833 this = exp.Dot( 2834 this=this, 2835 expression=self._parse_function() or self._parse_id_var(any_token=False), 2836 ) 2837 2838 if view: 2839 table = self._parse_id_var(any_token=False) 2840 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2841 table_alias: t.Optional[exp.TableAlias] = self.expression( 2842 exp.TableAlias, this=table, columns=columns 2843 ) 2844 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2845 # We move the alias from the lateral's child node to the lateral itself 2846 table_alias = this.args["alias"].pop() 2847 else: 2848 table_alias = self._parse_table_alias() 2849 2850 return self.expression( 2851 exp.Lateral, 2852 this=this, 2853 view=view, 2854 outer=outer, 2855 alias=table_alias, 2856 cross_apply=cross_apply, 2857 ) 2858 2859 def _parse_join_parts( 2860 self, 2861 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2862 return ( 2863 self._match_set(self.JOIN_METHODS) and self._prev, 2864 self._match_set(self.JOIN_SIDES) and self._prev, 2865 self._match_set(self.JOIN_KINDS) and self._prev, 2866 ) 2867 2868 def _parse_join( 2869 self, skip_join_token: bool = False, parse_bracket: bool = False 2870 ) -> t.Optional[exp.Join]: 2871 if self._match(TokenType.COMMA): 2872 return self.expression(exp.Join, this=self._parse_table()) 2873 2874 index = self._index 2875 method, side, kind = self._parse_join_parts() 2876 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2877 join = self._match(TokenType.JOIN) 2878 2879 if not skip_join_token and not join: 2880 self._retreat(index) 2881 kind = None 2882 method = None 2883 side = None 2884 2885 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2886 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2887 2888 if not skip_join_token and not join and not outer_apply and not cross_apply: 2889 return None 2890 2891 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2892 2893 if method: 2894 kwargs["method"] = method.text 2895 if side: 2896 kwargs["side"] = side.text 2897 if kind: 2898 kwargs["kind"] = kind.text 2899 if hint: 2900 kwargs["hint"] = hint 2901 2902 if self._match(TokenType.ON): 2903 kwargs["on"] = self._parse_conjunction() 2904 elif self._match(TokenType.USING): 2905 kwargs["using"] = self._parse_wrapped_id_vars() 2906 elif not (kind and kind.token_type == TokenType.CROSS): 2907 index = self._index 2908 join = self._parse_join() 2909 2910 if join and self._match(TokenType.ON): 2911 kwargs["on"] = self._parse_conjunction() 2912 elif join and self._match(TokenType.USING): 2913 kwargs["using"] = self._parse_wrapped_id_vars() 2914 else: 2915 join = None 2916 self._retreat(index) 2917 2918 kwargs["this"].set("joins", [join] if join else None) 2919 2920 comments = [c for token in (method, side, kind) if token for c in token.comments] 2921 return self.expression(exp.Join, comments=comments, **kwargs) 2922 2923 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2924 this = self._parse_conjunction() 2925 2926 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2927 return this 2928 2929 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2930 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2931 2932 return this 2933 2934 def _parse_index_params(self) -> exp.IndexParameters: 2935 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2936 2937 if self._match(TokenType.L_PAREN, advance=False): 2938 columns = self._parse_wrapped_csv(self._parse_with_operator) 2939 else: 2940 columns = None 2941 2942 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2943 partition_by = self._parse_partition_by() 2944 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 2945 tablespace = ( 2946 self._parse_var(any_token=True) 2947 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 2948 else None 2949 ) 2950 where = self._parse_where() 2951 2952 return self.expression( 2953 exp.IndexParameters, 2954 using=using, 2955 columns=columns, 2956 include=include, 2957 partition_by=partition_by, 2958 where=where, 2959 with_storage=with_storage, 2960 tablespace=tablespace, 2961 ) 2962 2963 def _parse_index( 2964 self, 2965 index: t.Optional[exp.Expression] = None, 2966 ) -> t.Optional[exp.Index]: 2967 if index: 2968 unique = None 2969 primary = None 2970 amp = None 2971 2972 self._match(TokenType.ON) 2973 self._match(TokenType.TABLE) # hive 2974 table = self._parse_table_parts(schema=True) 2975 else: 2976 unique = self._match(TokenType.UNIQUE) 2977 primary = self._match_text_seq("PRIMARY") 2978 amp = self._match_text_seq("AMP") 2979 2980 if not self._match(TokenType.INDEX): 2981 return None 2982 2983 index = self._parse_id_var() 2984 table = None 2985 2986 params = self._parse_index_params() 2987 2988 return self.expression( 2989 exp.Index, 2990 this=index, 2991 table=table, 2992 unique=unique, 2993 primary=primary, 2994 amp=amp, 2995 params=params, 2996 ) 2997 2998 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2999 hints: t.List[exp.Expression] = [] 3000 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3001 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3002 hints.append( 3003 self.expression( 3004 exp.WithTableHint, 3005 expressions=self._parse_csv( 3006 lambda: self._parse_function() or self._parse_var(any_token=True) 3007 ), 3008 ) 3009 ) 3010 self._match_r_paren() 3011 else: 3012 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3013 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3014 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3015 3016 self._match_texts(("INDEX", "KEY")) 3017 if self._match(TokenType.FOR): 3018 hint.set("target", self._advance_any() and self._prev.text.upper()) 3019 3020 hint.set("expressions", self._parse_wrapped_id_vars()) 3021 hints.append(hint) 3022 3023 return hints or None 3024 3025 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3026 return ( 3027 (not schema and self._parse_function(optional_parens=False)) 3028 or self._parse_id_var(any_token=False) 3029 or self._parse_string_as_identifier() 3030 or self._parse_placeholder() 3031 ) 3032 3033 def _parse_table_parts( 3034 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3035 ) -> exp.Table: 3036 catalog = None 3037 db = None 3038 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3039 3040 while self._match(TokenType.DOT): 3041 if catalog: 3042 # This allows nesting the table in arbitrarily many dot expressions if needed 3043 table = self.expression( 3044 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3045 ) 3046 else: 3047 catalog = db 3048 db = table 3049 # "" used for tsql FROM a..b case 3050 table = self._parse_table_part(schema=schema) or "" 3051 3052 if ( 3053 wildcard 3054 and self._is_connected() 3055 and (isinstance(table, exp.Identifier) or not table) 3056 and self._match(TokenType.STAR) 3057 ): 3058 if isinstance(table, exp.Identifier): 3059 table.args["this"] += "*" 3060 else: 3061 table = exp.Identifier(this="*") 3062 3063 if is_db_reference: 3064 catalog = db 3065 db = table 3066 table = None 3067 3068 if not table and not is_db_reference: 3069 self.raise_error(f"Expected table name but got {self._curr}") 3070 if not db and is_db_reference: 3071 self.raise_error(f"Expected database name but got {self._curr}") 3072 3073 return self.expression( 3074 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 3075 ) 3076 3077 def _parse_table( 3078 self, 3079 schema: bool = False, 3080 joins: bool = False, 3081 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3082 parse_bracket: bool = False, 3083 is_db_reference: bool = False, 3084 ) -> t.Optional[exp.Expression]: 3085 lateral = self._parse_lateral() 3086 if lateral: 3087 return lateral 3088 3089 unnest = self._parse_unnest() 3090 if unnest: 3091 return unnest 3092 3093 values = self._parse_derived_table_values() 3094 if values: 3095 return values 3096 3097 subquery = self._parse_select(table=True) 3098 if subquery: 3099 if not subquery.args.get("pivots"): 3100 subquery.set("pivots", self._parse_pivots()) 3101 return subquery 3102 3103 bracket = parse_bracket and self._parse_bracket(None) 3104 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3105 3106 only = self._match(TokenType.ONLY) 3107 3108 this = t.cast( 3109 exp.Expression, 3110 bracket 3111 or self._parse_bracket( 3112 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3113 ), 3114 ) 3115 3116 if only: 3117 this.set("only", only) 3118 3119 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3120 self._match_text_seq("*") 3121 3122 if schema: 3123 return self._parse_schema(this=this) 3124 3125 version = self._parse_version() 3126 3127 if version: 3128 this.set("version", version) 3129 3130 if self.dialect.ALIAS_POST_TABLESAMPLE: 3131 table_sample = self._parse_table_sample() 3132 3133 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3134 if alias: 3135 this.set("alias", alias) 3136 3137 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3138 return self.expression( 3139 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3140 ) 3141 3142 this.set("hints", self._parse_table_hints()) 3143 3144 if not this.args.get("pivots"): 3145 this.set("pivots", self._parse_pivots()) 3146 3147 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3148 table_sample = self._parse_table_sample() 3149 3150 if table_sample: 3151 table_sample.set("this", this) 3152 this = table_sample 3153 3154 if joins: 3155 for join in iter(self._parse_join, None): 3156 this.append("joins", join) 3157 3158 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3159 this.set("ordinality", True) 3160 this.set("alias", self._parse_table_alias()) 3161 3162 return this 3163 3164 def _parse_version(self) -> t.Optional[exp.Version]: 3165 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3166 this = "TIMESTAMP" 3167 elif self._match(TokenType.VERSION_SNAPSHOT): 3168 this = "VERSION" 3169 else: 3170 return None 3171 3172 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3173 kind = self._prev.text.upper() 3174 start = self._parse_bitwise() 3175 self._match_texts(("TO", "AND")) 3176 end = self._parse_bitwise() 3177 expression: t.Optional[exp.Expression] = self.expression( 3178 exp.Tuple, expressions=[start, end] 3179 ) 3180 elif self._match_text_seq("CONTAINED", "IN"): 3181 kind = "CONTAINED IN" 3182 expression = self.expression( 3183 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3184 ) 3185 elif self._match(TokenType.ALL): 3186 kind = "ALL" 3187 expression = None 3188 else: 3189 self._match_text_seq("AS", "OF") 3190 kind = "AS OF" 3191 expression = self._parse_type() 3192 3193 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3194 3195 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3196 if not self._match(TokenType.UNNEST): 3197 return None 3198 3199 expressions = self._parse_wrapped_csv(self._parse_equality) 3200 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3201 3202 alias = self._parse_table_alias() if with_alias else None 3203 3204 if alias: 3205 if self.dialect.UNNEST_COLUMN_ONLY: 3206 if alias.args.get("columns"): 3207 self.raise_error("Unexpected extra column alias in unnest.") 3208 3209 alias.set("columns", [alias.this]) 3210 alias.set("this", None) 3211 3212 columns = alias.args.get("columns") or [] 3213 if offset and len(expressions) < len(columns): 3214 offset = columns.pop() 3215 3216 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3217 self._match(TokenType.ALIAS) 3218 offset = self._parse_id_var( 3219 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3220 ) or exp.to_identifier("offset") 3221 3222 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3223 3224 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3225 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3226 if not is_derived and not self._match_text_seq("VALUES"): 3227 return None 3228 3229 expressions = self._parse_csv(self._parse_value) 3230 alias = self._parse_table_alias() 3231 3232 if is_derived: 3233 self._match_r_paren() 3234 3235 return self.expression( 3236 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3237 ) 3238 3239 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3240 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3241 as_modifier and self._match_text_seq("USING", "SAMPLE") 3242 ): 3243 return None 3244 3245 bucket_numerator = None 3246 bucket_denominator = None 3247 bucket_field = None 3248 percent = None 3249 size = None 3250 seed = None 3251 3252 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3253 matched_l_paren = self._match(TokenType.L_PAREN) 3254 3255 if self.TABLESAMPLE_CSV: 3256 num = None 3257 expressions = self._parse_csv(self._parse_primary) 3258 else: 3259 expressions = None 3260 num = ( 3261 self._parse_factor() 3262 if self._match(TokenType.NUMBER, advance=False) 3263 else self._parse_primary() or self._parse_placeholder() 3264 ) 3265 3266 if self._match_text_seq("BUCKET"): 3267 bucket_numerator = self._parse_number() 3268 self._match_text_seq("OUT", "OF") 3269 bucket_denominator = bucket_denominator = self._parse_number() 3270 self._match(TokenType.ON) 3271 bucket_field = self._parse_field() 3272 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3273 percent = num 3274 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3275 size = num 3276 else: 3277 percent = num 3278 3279 if matched_l_paren: 3280 self._match_r_paren() 3281 3282 if self._match(TokenType.L_PAREN): 3283 method = self._parse_var(upper=True) 3284 seed = self._match(TokenType.COMMA) and self._parse_number() 3285 self._match_r_paren() 3286 elif self._match_texts(("SEED", "REPEATABLE")): 3287 seed = self._parse_wrapped(self._parse_number) 3288 3289 return self.expression( 3290 exp.TableSample, 3291 expressions=expressions, 3292 method=method, 3293 bucket_numerator=bucket_numerator, 3294 bucket_denominator=bucket_denominator, 3295 bucket_field=bucket_field, 3296 percent=percent, 3297 size=size, 3298 seed=seed, 3299 ) 3300 3301 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3302 return list(iter(self._parse_pivot, None)) or None 3303 3304 # https://duckdb.org/docs/sql/statements/pivot 3305 def _parse_simplified_pivot(self) -> exp.Pivot: 3306 def _parse_on() -> t.Optional[exp.Expression]: 3307 this = self._parse_bitwise() 3308 return self._parse_in(this) if self._match(TokenType.IN) else this 3309 3310 this = self._parse_table() 3311 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3312 using = self._match(TokenType.USING) and self._parse_csv( 3313 lambda: self._parse_alias(self._parse_function()) 3314 ) 3315 group = self._parse_group() 3316 return self.expression( 3317 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3318 ) 3319 3320 def _parse_pivot_in(self) -> exp.In: 3321 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3322 this = self._parse_conjunction() 3323 3324 self._match(TokenType.ALIAS) 3325 alias = self._parse_field() 3326 if alias: 3327 return self.expression(exp.PivotAlias, this=this, alias=alias) 3328 3329 return this 3330 3331 value = self._parse_column() 3332 3333 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3334 self.raise_error("Expecting IN (") 3335 3336 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3337 3338 self._match_r_paren() 3339 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3340 3341 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3342 index = self._index 3343 include_nulls = None 3344 3345 if self._match(TokenType.PIVOT): 3346 unpivot = False 3347 elif self._match(TokenType.UNPIVOT): 3348 unpivot = True 3349 3350 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3351 if self._match_text_seq("INCLUDE", "NULLS"): 3352 include_nulls = True 3353 elif self._match_text_seq("EXCLUDE", "NULLS"): 3354 include_nulls = False 3355 else: 3356 return None 3357 3358 expressions = [] 3359 3360 if not self._match(TokenType.L_PAREN): 3361 self._retreat(index) 3362 return None 3363 3364 if unpivot: 3365 expressions = self._parse_csv(self._parse_column) 3366 else: 3367 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3368 3369 if not expressions: 3370 self.raise_error("Failed to parse PIVOT's aggregation list") 3371 3372 if not self._match(TokenType.FOR): 3373 self.raise_error("Expecting FOR") 3374 3375 field = self._parse_pivot_in() 3376 3377 self._match_r_paren() 3378 3379 pivot = self.expression( 3380 exp.Pivot, 3381 expressions=expressions, 3382 field=field, 3383 unpivot=unpivot, 3384 include_nulls=include_nulls, 3385 ) 3386 3387 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3388 pivot.set("alias", self._parse_table_alias()) 3389 3390 if not unpivot: 3391 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3392 3393 columns: t.List[exp.Expression] = [] 3394 for fld in pivot.args["field"].expressions: 3395 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3396 for name in names: 3397 if self.PREFIXED_PIVOT_COLUMNS: 3398 name = f"{name}_{field_name}" if name else field_name 3399 else: 3400 name = f"{field_name}_{name}" if name else field_name 3401 3402 columns.append(exp.to_identifier(name)) 3403 3404 pivot.set("columns", columns) 3405 3406 return pivot 3407 3408 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3409 return [agg.alias for agg in aggregations] 3410 3411 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3412 if not skip_where_token and not self._match(TokenType.PREWHERE): 3413 return None 3414 3415 return self.expression( 3416 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3417 ) 3418 3419 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3420 if not skip_where_token and not self._match(TokenType.WHERE): 3421 return None 3422 3423 return self.expression( 3424 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3425 ) 3426 3427 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3428 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3429 return None 3430 3431 elements = defaultdict(list) 3432 3433 if self._match(TokenType.ALL): 3434 return self.expression(exp.Group, all=True) 3435 3436 while True: 3437 expressions = self._parse_csv(self._parse_conjunction) 3438 if expressions: 3439 elements["expressions"].extend(expressions) 3440 3441 grouping_sets = self._parse_grouping_sets() 3442 if grouping_sets: 3443 elements["grouping_sets"].extend(grouping_sets) 3444 3445 rollup = None 3446 cube = None 3447 totals = None 3448 3449 index = self._index 3450 with_ = self._match(TokenType.WITH) 3451 if self._match(TokenType.ROLLUP): 3452 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3453 elements["rollup"].extend(ensure_list(rollup)) 3454 3455 if self._match(TokenType.CUBE): 3456 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3457 elements["cube"].extend(ensure_list(cube)) 3458 3459 if self._match_text_seq("TOTALS"): 3460 totals = True 3461 elements["totals"] = True # type: ignore 3462 3463 if not (grouping_sets or rollup or cube or totals): 3464 if with_: 3465 self._retreat(index) 3466 break 3467 3468 return self.expression(exp.Group, **elements) # type: ignore 3469 3470 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3471 if not self._match(TokenType.GROUPING_SETS): 3472 return None 3473 3474 return self._parse_wrapped_csv(self._parse_grouping_set) 3475 3476 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3477 if self._match(TokenType.L_PAREN): 3478 grouping_set = self._parse_csv(self._parse_column) 3479 self._match_r_paren() 3480 return self.expression(exp.Tuple, expressions=grouping_set) 3481 3482 return self._parse_column() 3483 3484 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3485 if not skip_having_token and not self._match(TokenType.HAVING): 3486 return None 3487 return self.expression(exp.Having, this=self._parse_conjunction()) 3488 3489 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3490 if not self._match(TokenType.QUALIFY): 3491 return None 3492 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3493 3494 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3495 if skip_start_token: 3496 start = None 3497 elif self._match(TokenType.START_WITH): 3498 start = self._parse_conjunction() 3499 else: 3500 return None 3501 3502 self._match(TokenType.CONNECT_BY) 3503 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3504 exp.Prior, this=self._parse_bitwise() 3505 ) 3506 connect = self._parse_conjunction() 3507 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3508 3509 if not start and self._match(TokenType.START_WITH): 3510 start = self._parse_conjunction() 3511 3512 return self.expression(exp.Connect, start=start, connect=connect) 3513 3514 def _parse_name_as_expression(self) -> exp.Alias: 3515 return self.expression( 3516 exp.Alias, 3517 alias=self._parse_id_var(any_token=True), 3518 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3519 ) 3520 3521 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3522 if self._match_text_seq("INTERPOLATE"): 3523 return self._parse_wrapped_csv(self._parse_name_as_expression) 3524 return None 3525 3526 def _parse_order( 3527 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3528 ) -> t.Optional[exp.Expression]: 3529 siblings = None 3530 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3531 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3532 return this 3533 3534 siblings = True 3535 3536 return self.expression( 3537 exp.Order, 3538 this=this, 3539 expressions=self._parse_csv(self._parse_ordered), 3540 interpolate=self._parse_interpolate(), 3541 siblings=siblings, 3542 ) 3543 3544 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3545 if not self._match(token): 3546 return None 3547 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3548 3549 def _parse_ordered( 3550 self, parse_method: t.Optional[t.Callable] = None 3551 ) -> t.Optional[exp.Ordered]: 3552 this = parse_method() if parse_method else self._parse_conjunction() 3553 if not this: 3554 return None 3555 3556 asc = self._match(TokenType.ASC) 3557 desc = self._match(TokenType.DESC) or (asc and False) 3558 3559 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3560 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3561 3562 nulls_first = is_nulls_first or False 3563 explicitly_null_ordered = is_nulls_first or is_nulls_last 3564 3565 if ( 3566 not explicitly_null_ordered 3567 and ( 3568 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3569 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3570 ) 3571 and self.dialect.NULL_ORDERING != "nulls_are_last" 3572 ): 3573 nulls_first = True 3574 3575 if self._match_text_seq("WITH", "FILL"): 3576 with_fill = self.expression( 3577 exp.WithFill, 3578 **{ # type: ignore 3579 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3580 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3581 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3582 }, 3583 ) 3584 else: 3585 with_fill = None 3586 3587 return self.expression( 3588 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3589 ) 3590 3591 def _parse_limit( 3592 self, 3593 this: t.Optional[exp.Expression] = None, 3594 top: bool = False, 3595 skip_limit_token: bool = False, 3596 ) -> t.Optional[exp.Expression]: 3597 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3598 comments = self._prev_comments 3599 if top: 3600 limit_paren = self._match(TokenType.L_PAREN) 3601 expression = self._parse_term() if limit_paren else self._parse_number() 3602 3603 if limit_paren: 3604 self._match_r_paren() 3605 else: 3606 expression = self._parse_term() 3607 3608 if self._match(TokenType.COMMA): 3609 offset = expression 3610 expression = self._parse_term() 3611 else: 3612 offset = None 3613 3614 limit_exp = self.expression( 3615 exp.Limit, 3616 this=this, 3617 expression=expression, 3618 offset=offset, 3619 comments=comments, 3620 expressions=self._parse_limit_by(), 3621 ) 3622 3623 return limit_exp 3624 3625 if self._match(TokenType.FETCH): 3626 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3627 direction = self._prev.text.upper() if direction else "FIRST" 3628 3629 count = self._parse_field(tokens=self.FETCH_TOKENS) 3630 percent = self._match(TokenType.PERCENT) 3631 3632 self._match_set((TokenType.ROW, TokenType.ROWS)) 3633 3634 only = self._match_text_seq("ONLY") 3635 with_ties = self._match_text_seq("WITH", "TIES") 3636 3637 if only and with_ties: 3638 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3639 3640 return self.expression( 3641 exp.Fetch, 3642 direction=direction, 3643 count=count, 3644 percent=percent, 3645 with_ties=with_ties, 3646 ) 3647 3648 return this 3649 3650 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3651 if not self._match(TokenType.OFFSET): 3652 return this 3653 3654 count = self._parse_term() 3655 self._match_set((TokenType.ROW, TokenType.ROWS)) 3656 3657 return self.expression( 3658 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3659 ) 3660 3661 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3662 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3663 3664 def _parse_locks(self) -> t.List[exp.Lock]: 3665 locks = [] 3666 while True: 3667 if self._match_text_seq("FOR", "UPDATE"): 3668 update = True 3669 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3670 "LOCK", "IN", "SHARE", "MODE" 3671 ): 3672 update = False 3673 else: 3674 break 3675 3676 expressions = None 3677 if self._match_text_seq("OF"): 3678 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3679 3680 wait: t.Optional[bool | exp.Expression] = None 3681 if self._match_text_seq("NOWAIT"): 3682 wait = True 3683 elif self._match_text_seq("WAIT"): 3684 wait = self._parse_primary() 3685 elif self._match_text_seq("SKIP", "LOCKED"): 3686 wait = False 3687 3688 locks.append( 3689 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3690 ) 3691 3692 return locks 3693 3694 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3695 while this and self._match_set(self.SET_OPERATIONS): 3696 token_type = self._prev.token_type 3697 3698 if token_type == TokenType.UNION: 3699 operation = exp.Union 3700 elif token_type == TokenType.EXCEPT: 3701 operation = exp.Except 3702 else: 3703 operation = exp.Intersect 3704 3705 comments = self._prev.comments 3706 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3707 by_name = self._match_text_seq("BY", "NAME") 3708 expression = self._parse_select(nested=True, parse_set_operation=False) 3709 3710 this = self.expression( 3711 operation, 3712 comments=comments, 3713 this=this, 3714 distinct=distinct, 3715 by_name=by_name, 3716 expression=expression, 3717 ) 3718 3719 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3720 expression = this.expression 3721 3722 if expression: 3723 for arg in self.UNION_MODIFIERS: 3724 expr = expression.args.get(arg) 3725 if expr: 3726 this.set(arg, expr.pop()) 3727 3728 return this 3729 3730 def _parse_expression(self) -> t.Optional[exp.Expression]: 3731 return self._parse_alias(self._parse_conjunction()) 3732 3733 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3734 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3735 3736 def _parse_equality(self) -> t.Optional[exp.Expression]: 3737 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3738 3739 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3740 return self._parse_tokens(self._parse_range, self.COMPARISON) 3741 3742 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3743 this = this or self._parse_bitwise() 3744 negate = self._match(TokenType.NOT) 3745 3746 if self._match_set(self.RANGE_PARSERS): 3747 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3748 if not expression: 3749 return this 3750 3751 this = expression 3752 elif self._match(TokenType.ISNULL): 3753 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3754 3755 # Postgres supports ISNULL and NOTNULL for conditions. 3756 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3757 if self._match(TokenType.NOTNULL): 3758 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3759 this = self.expression(exp.Not, this=this) 3760 3761 if negate: 3762 this = self.expression(exp.Not, this=this) 3763 3764 if self._match(TokenType.IS): 3765 this = self._parse_is(this) 3766 3767 return this 3768 3769 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3770 index = self._index - 1 3771 negate = self._match(TokenType.NOT) 3772 3773 if self._match_text_seq("DISTINCT", "FROM"): 3774 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3775 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3776 3777 expression = self._parse_null() or self._parse_boolean() 3778 if not expression: 3779 self._retreat(index) 3780 return None 3781 3782 this = self.expression(exp.Is, this=this, expression=expression) 3783 return self.expression(exp.Not, this=this) if negate else this 3784 3785 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3786 unnest = self._parse_unnest(with_alias=False) 3787 if unnest: 3788 this = self.expression(exp.In, this=this, unnest=unnest) 3789 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3790 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3791 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3792 3793 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3794 this = self.expression(exp.In, this=this, query=expressions[0]) 3795 else: 3796 this = self.expression(exp.In, this=this, expressions=expressions) 3797 3798 if matched_l_paren: 3799 self._match_r_paren(this) 3800 elif not self._match(TokenType.R_BRACKET, expression=this): 3801 self.raise_error("Expecting ]") 3802 else: 3803 this = self.expression(exp.In, this=this, field=self._parse_field()) 3804 3805 return this 3806 3807 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3808 low = self._parse_bitwise() 3809 self._match(TokenType.AND) 3810 high = self._parse_bitwise() 3811 return self.expression(exp.Between, this=this, low=low, high=high) 3812 3813 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3814 if not self._match(TokenType.ESCAPE): 3815 return this 3816 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3817 3818 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3819 index = self._index 3820 3821 if not self._match(TokenType.INTERVAL) and match_interval: 3822 return None 3823 3824 if self._match(TokenType.STRING, advance=False): 3825 this = self._parse_primary() 3826 else: 3827 this = self._parse_term() 3828 3829 if not this or ( 3830 isinstance(this, exp.Column) 3831 and not this.table 3832 and not this.this.quoted 3833 and this.name.upper() == "IS" 3834 ): 3835 self._retreat(index) 3836 return None 3837 3838 unit = self._parse_function() or ( 3839 not self._match(TokenType.ALIAS, advance=False) 3840 and self._parse_var(any_token=True, upper=True) 3841 ) 3842 3843 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3844 # each INTERVAL expression into this canonical form so it's easy to transpile 3845 if this and this.is_number: 3846 this = exp.Literal.string(this.name) 3847 elif this and this.is_string: 3848 parts = this.name.split() 3849 3850 if len(parts) == 2: 3851 if unit: 3852 # This is not actually a unit, it's something else (e.g. a "window side") 3853 unit = None 3854 self._retreat(self._index - 1) 3855 3856 this = exp.Literal.string(parts[0]) 3857 unit = self.expression(exp.Var, this=parts[1].upper()) 3858 3859 return self.expression(exp.Interval, this=this, unit=unit) 3860 3861 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3862 this = self._parse_term() 3863 3864 while True: 3865 if self._match_set(self.BITWISE): 3866 this = self.expression( 3867 self.BITWISE[self._prev.token_type], 3868 this=this, 3869 expression=self._parse_term(), 3870 ) 3871 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3872 this = self.expression( 3873 exp.DPipe, 3874 this=this, 3875 expression=self._parse_term(), 3876 safe=not self.dialect.STRICT_STRING_CONCAT, 3877 ) 3878 elif self._match(TokenType.DQMARK): 3879 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3880 elif self._match_pair(TokenType.LT, TokenType.LT): 3881 this = self.expression( 3882 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3883 ) 3884 elif self._match_pair(TokenType.GT, TokenType.GT): 3885 this = self.expression( 3886 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3887 ) 3888 else: 3889 break 3890 3891 return this 3892 3893 def _parse_term(self) -> t.Optional[exp.Expression]: 3894 return self._parse_tokens(self._parse_factor, self.TERM) 3895 3896 def _parse_factor(self) -> t.Optional[exp.Expression]: 3897 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3898 this = parse_method() 3899 3900 while self._match_set(self.FACTOR): 3901 this = self.expression( 3902 self.FACTOR[self._prev.token_type], 3903 this=this, 3904 comments=self._prev_comments, 3905 expression=parse_method(), 3906 ) 3907 if isinstance(this, exp.Div): 3908 this.args["typed"] = self.dialect.TYPED_DIVISION 3909 this.args["safe"] = self.dialect.SAFE_DIVISION 3910 3911 return this 3912 3913 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3914 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3915 3916 def _parse_unary(self) -> t.Optional[exp.Expression]: 3917 if self._match_set(self.UNARY_PARSERS): 3918 return self.UNARY_PARSERS[self._prev.token_type](self) 3919 return self._parse_at_time_zone(self._parse_type()) 3920 3921 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3922 interval = parse_interval and self._parse_interval() 3923 if interval: 3924 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3925 while True: 3926 index = self._index 3927 self._match(TokenType.PLUS) 3928 3929 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3930 self._retreat(index) 3931 break 3932 3933 interval = self.expression( # type: ignore 3934 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3935 ) 3936 3937 return interval 3938 3939 index = self._index 3940 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3941 this = self._parse_column() 3942 3943 if data_type: 3944 if isinstance(this, exp.Literal): 3945 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3946 if parser: 3947 return parser(self, this, data_type) 3948 return self.expression(exp.Cast, this=this, to=data_type) 3949 if not data_type.expressions: 3950 self._retreat(index) 3951 return self._parse_column() 3952 return self._parse_column_ops(data_type) 3953 3954 return this and self._parse_column_ops(this) 3955 3956 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3957 this = self._parse_type() 3958 if not this: 3959 return None 3960 3961 if isinstance(this, exp.Column) and not this.table: 3962 this = exp.var(this.name.upper()) 3963 3964 return self.expression( 3965 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3966 ) 3967 3968 def _parse_types( 3969 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3970 ) -> t.Optional[exp.Expression]: 3971 index = self._index 3972 3973 prefix = self._match_text_seq("SYSUDTLIB", ".") 3974 3975 if not self._match_set(self.TYPE_TOKENS): 3976 identifier = allow_identifiers and self._parse_id_var( 3977 any_token=False, tokens=(TokenType.VAR,) 3978 ) 3979 if identifier: 3980 tokens = self.dialect.tokenize(identifier.name) 3981 3982 if len(tokens) != 1: 3983 self.raise_error("Unexpected identifier", self._prev) 3984 3985 if tokens[0].token_type in self.TYPE_TOKENS: 3986 self._prev = tokens[0] 3987 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3988 type_name = identifier.name 3989 3990 while self._match(TokenType.DOT): 3991 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3992 3993 return exp.DataType.build(type_name, udt=True) 3994 else: 3995 self._retreat(self._index - 1) 3996 return None 3997 else: 3998 return None 3999 4000 type_token = self._prev.token_type 4001 4002 if type_token == TokenType.PSEUDO_TYPE: 4003 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4004 4005 if type_token == TokenType.OBJECT_IDENTIFIER: 4006 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4007 4008 nested = type_token in self.NESTED_TYPE_TOKENS 4009 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4010 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4011 expressions = None 4012 maybe_func = False 4013 4014 if self._match(TokenType.L_PAREN): 4015 if is_struct: 4016 expressions = self._parse_csv(self._parse_struct_types) 4017 elif nested: 4018 expressions = self._parse_csv( 4019 lambda: self._parse_types( 4020 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4021 ) 4022 ) 4023 elif type_token in self.ENUM_TYPE_TOKENS: 4024 expressions = self._parse_csv(self._parse_equality) 4025 elif is_aggregate: 4026 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4027 any_token=False, tokens=(TokenType.VAR,) 4028 ) 4029 if not func_or_ident or not self._match(TokenType.COMMA): 4030 return None 4031 expressions = self._parse_csv( 4032 lambda: self._parse_types( 4033 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4034 ) 4035 ) 4036 expressions.insert(0, func_or_ident) 4037 else: 4038 expressions = self._parse_csv(self._parse_type_size) 4039 4040 if not expressions or not self._match(TokenType.R_PAREN): 4041 self._retreat(index) 4042 return None 4043 4044 maybe_func = True 4045 4046 this: t.Optional[exp.Expression] = None 4047 values: t.Optional[t.List[exp.Expression]] = None 4048 4049 if nested and self._match(TokenType.LT): 4050 if is_struct: 4051 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4052 else: 4053 expressions = self._parse_csv( 4054 lambda: self._parse_types( 4055 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4056 ) 4057 ) 4058 4059 if not self._match(TokenType.GT): 4060 self.raise_error("Expecting >") 4061 4062 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4063 values = self._parse_csv(self._parse_conjunction) 4064 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4065 4066 if type_token in self.TIMESTAMPS: 4067 if self._match_text_seq("WITH", "TIME", "ZONE"): 4068 maybe_func = False 4069 tz_type = ( 4070 exp.DataType.Type.TIMETZ 4071 if type_token in self.TIMES 4072 else exp.DataType.Type.TIMESTAMPTZ 4073 ) 4074 this = exp.DataType(this=tz_type, expressions=expressions) 4075 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4076 maybe_func = False 4077 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4078 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4079 maybe_func = False 4080 elif type_token == TokenType.INTERVAL: 4081 unit = self._parse_var() 4082 4083 if self._match_text_seq("TO"): 4084 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 4085 else: 4086 span = None 4087 4088 if span or not unit: 4089 this = self.expression( 4090 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 4091 ) 4092 else: 4093 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4094 4095 if maybe_func and check_func: 4096 index2 = self._index 4097 peek = self._parse_string() 4098 4099 if not peek: 4100 self._retreat(index) 4101 return None 4102 4103 self._retreat(index2) 4104 4105 if not this: 4106 if self._match_text_seq("UNSIGNED"): 4107 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4108 if not unsigned_type_token: 4109 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4110 4111 type_token = unsigned_type_token or type_token 4112 4113 this = exp.DataType( 4114 this=exp.DataType.Type[type_token.value], 4115 expressions=expressions, 4116 nested=nested, 4117 values=values, 4118 prefix=prefix, 4119 ) 4120 4121 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4122 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4123 4124 return this 4125 4126 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4127 index = self._index 4128 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4129 self._match(TokenType.COLON) 4130 column_def = self._parse_column_def(this) 4131 4132 if type_required and ( 4133 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4134 ): 4135 self._retreat(index) 4136 return self._parse_types() 4137 4138 return column_def 4139 4140 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4141 if not self._match_text_seq("AT", "TIME", "ZONE"): 4142 return this 4143 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4144 4145 def _parse_column(self) -> t.Optional[exp.Expression]: 4146 this = self._parse_column_reference() 4147 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4148 4149 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4150 this = self._parse_field() 4151 if ( 4152 not this 4153 and self._match(TokenType.VALUES, advance=False) 4154 and self.VALUES_FOLLOWED_BY_PAREN 4155 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4156 ): 4157 this = self._parse_id_var() 4158 4159 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 4160 4161 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4162 this = self._parse_bracket(this) 4163 4164 while self._match_set(self.COLUMN_OPERATORS): 4165 op_token = self._prev.token_type 4166 op = self.COLUMN_OPERATORS.get(op_token) 4167 4168 if op_token == TokenType.DCOLON: 4169 field = self._parse_types() 4170 if not field: 4171 self.raise_error("Expected type") 4172 elif op and self._curr: 4173 field = self._parse_column_reference() 4174 else: 4175 field = self._parse_field(anonymous_func=True, any_token=True) 4176 4177 if isinstance(field, exp.Func) and this: 4178 # bigquery allows function calls like x.y.count(...) 4179 # SAFE.SUBSTR(...) 4180 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4181 this = exp.replace_tree( 4182 this, 4183 lambda n: ( 4184 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4185 if n.table 4186 else n.this 4187 ) 4188 if isinstance(n, exp.Column) 4189 else n, 4190 ) 4191 4192 if op: 4193 this = op(self, this, field) 4194 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4195 this = self.expression( 4196 exp.Column, 4197 this=field, 4198 table=this.this, 4199 db=this.args.get("table"), 4200 catalog=this.args.get("db"), 4201 ) 4202 else: 4203 this = self.expression(exp.Dot, this=this, expression=field) 4204 this = self._parse_bracket(this) 4205 return this 4206 4207 def _parse_primary(self) -> t.Optional[exp.Expression]: 4208 if self._match_set(self.PRIMARY_PARSERS): 4209 token_type = self._prev.token_type 4210 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4211 4212 if token_type == TokenType.STRING: 4213 expressions = [primary] 4214 while self._match(TokenType.STRING): 4215 expressions.append(exp.Literal.string(self._prev.text)) 4216 4217 if len(expressions) > 1: 4218 return self.expression(exp.Concat, expressions=expressions) 4219 4220 return primary 4221 4222 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4223 return exp.Literal.number(f"0.{self._prev.text}") 4224 4225 if self._match(TokenType.L_PAREN): 4226 comments = self._prev_comments 4227 query = self._parse_select() 4228 4229 if query: 4230 expressions = [query] 4231 else: 4232 expressions = self._parse_expressions() 4233 4234 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4235 4236 if isinstance(this, exp.UNWRAPPED_QUERIES): 4237 this = self._parse_set_operations( 4238 self._parse_subquery(this=this, parse_alias=False) 4239 ) 4240 elif isinstance(this, exp.Subquery): 4241 this = self._parse_subquery( 4242 this=self._parse_set_operations(this), parse_alias=False 4243 ) 4244 elif len(expressions) > 1: 4245 this = self.expression(exp.Tuple, expressions=expressions) 4246 else: 4247 this = self.expression(exp.Paren, this=this) 4248 4249 if this: 4250 this.add_comments(comments) 4251 4252 self._match_r_paren(expression=this) 4253 return this 4254 4255 return None 4256 4257 def _parse_field( 4258 self, 4259 any_token: bool = False, 4260 tokens: t.Optional[t.Collection[TokenType]] = None, 4261 anonymous_func: bool = False, 4262 ) -> t.Optional[exp.Expression]: 4263 return ( 4264 self._parse_primary() 4265 or self._parse_function(anonymous=anonymous_func) 4266 or self._parse_id_var(any_token=any_token, tokens=tokens) 4267 ) 4268 4269 def _parse_function( 4270 self, 4271 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4272 anonymous: bool = False, 4273 optional_parens: bool = True, 4274 ) -> t.Optional[exp.Expression]: 4275 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4276 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4277 fn_syntax = False 4278 if ( 4279 self._match(TokenType.L_BRACE, advance=False) 4280 and self._next 4281 and self._next.text.upper() == "FN" 4282 ): 4283 self._advance(2) 4284 fn_syntax = True 4285 4286 func = self._parse_function_call( 4287 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4288 ) 4289 4290 if fn_syntax: 4291 self._match(TokenType.R_BRACE) 4292 4293 return func 4294 4295 def _parse_function_call( 4296 self, 4297 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4298 anonymous: bool = False, 4299 optional_parens: bool = True, 4300 ) -> t.Optional[exp.Expression]: 4301 if not self._curr: 4302 return None 4303 4304 comments = self._curr.comments 4305 token_type = self._curr.token_type 4306 this = self._curr.text 4307 upper = this.upper() 4308 4309 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4310 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4311 self._advance() 4312 return self._parse_window(parser(self)) 4313 4314 if not self._next or self._next.token_type != TokenType.L_PAREN: 4315 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4316 self._advance() 4317 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4318 4319 return None 4320 4321 if token_type not in self.FUNC_TOKENS: 4322 return None 4323 4324 self._advance(2) 4325 4326 parser = self.FUNCTION_PARSERS.get(upper) 4327 if parser and not anonymous: 4328 this = parser(self) 4329 else: 4330 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4331 4332 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4333 this = self.expression(subquery_predicate, this=self._parse_select()) 4334 self._match_r_paren() 4335 return this 4336 4337 if functions is None: 4338 functions = self.FUNCTIONS 4339 4340 function = functions.get(upper) 4341 4342 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4343 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4344 4345 if alias: 4346 args = self._kv_to_prop_eq(args) 4347 4348 if function and not anonymous: 4349 if "dialect" in function.__code__.co_varnames: 4350 func = function(args, dialect=self.dialect) 4351 else: 4352 func = function(args) 4353 4354 func = self.validate_expression(func, args) 4355 if not self.dialect.NORMALIZE_FUNCTIONS: 4356 func.meta["name"] = this 4357 4358 this = func 4359 else: 4360 if token_type == TokenType.IDENTIFIER: 4361 this = exp.Identifier(this=this, quoted=True) 4362 this = self.expression(exp.Anonymous, this=this, expressions=args) 4363 4364 if isinstance(this, exp.Expression): 4365 this.add_comments(comments) 4366 4367 self._match_r_paren(this) 4368 return self._parse_window(this) 4369 4370 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4371 transformed = [] 4372 4373 for e in expressions: 4374 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4375 if isinstance(e, exp.Alias): 4376 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4377 4378 if not isinstance(e, exp.PropertyEQ): 4379 e = self.expression( 4380 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4381 ) 4382 4383 if isinstance(e.this, exp.Column): 4384 e.this.replace(e.this.this) 4385 4386 transformed.append(e) 4387 4388 return transformed 4389 4390 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4391 return self._parse_column_def(self._parse_id_var()) 4392 4393 def _parse_user_defined_function( 4394 self, kind: t.Optional[TokenType] = None 4395 ) -> t.Optional[exp.Expression]: 4396 this = self._parse_id_var() 4397 4398 while self._match(TokenType.DOT): 4399 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4400 4401 if not self._match(TokenType.L_PAREN): 4402 return this 4403 4404 expressions = self._parse_csv(self._parse_function_parameter) 4405 self._match_r_paren() 4406 return self.expression( 4407 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4408 ) 4409 4410 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4411 literal = self._parse_primary() 4412 if literal: 4413 return self.expression(exp.Introducer, this=token.text, expression=literal) 4414 4415 return self.expression(exp.Identifier, this=token.text) 4416 4417 def _parse_session_parameter(self) -> exp.SessionParameter: 4418 kind = None 4419 this = self._parse_id_var() or self._parse_primary() 4420 4421 if this and self._match(TokenType.DOT): 4422 kind = this.name 4423 this = self._parse_var() or self._parse_primary() 4424 4425 return self.expression(exp.SessionParameter, this=this, kind=kind) 4426 4427 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4428 index = self._index 4429 4430 if self._match(TokenType.L_PAREN): 4431 expressions = t.cast( 4432 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4433 ) 4434 4435 if not self._match(TokenType.R_PAREN): 4436 self._retreat(index) 4437 else: 4438 expressions = [self._parse_id_var()] 4439 4440 if self._match_set(self.LAMBDAS): 4441 return self.LAMBDAS[self._prev.token_type](self, expressions) 4442 4443 self._retreat(index) 4444 4445 this: t.Optional[exp.Expression] 4446 4447 if self._match(TokenType.DISTINCT): 4448 this = self.expression( 4449 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4450 ) 4451 else: 4452 this = self._parse_select_or_expression(alias=alias) 4453 4454 return self._parse_limit( 4455 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4456 ) 4457 4458 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4459 index = self._index 4460 4461 if not self._match(TokenType.L_PAREN): 4462 return this 4463 4464 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4465 # expr can be of both types 4466 if self._match_set(self.SELECT_START_TOKENS): 4467 self._retreat(index) 4468 return this 4469 4470 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4471 4472 self._match_r_paren() 4473 return self.expression(exp.Schema, this=this, expressions=args) 4474 4475 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4476 return self._parse_column_def(self._parse_field(any_token=True)) 4477 4478 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4479 # column defs are not really columns, they're identifiers 4480 if isinstance(this, exp.Column): 4481 this = this.this 4482 4483 kind = self._parse_types(schema=True) 4484 4485 if self._match_text_seq("FOR", "ORDINALITY"): 4486 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4487 4488 constraints: t.List[exp.Expression] = [] 4489 4490 if not kind and self._match(TokenType.ALIAS): 4491 constraints.append( 4492 self.expression( 4493 exp.ComputedColumnConstraint, 4494 this=self._parse_conjunction(), 4495 persisted=self._match_text_seq("PERSISTED"), 4496 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4497 ) 4498 ) 4499 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4500 self._match(TokenType.ALIAS) 4501 constraints.append( 4502 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4503 ) 4504 4505 while True: 4506 constraint = self._parse_column_constraint() 4507 if not constraint: 4508 break 4509 constraints.append(constraint) 4510 4511 if not kind and not constraints: 4512 return this 4513 4514 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4515 4516 def _parse_auto_increment( 4517 self, 4518 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4519 start = None 4520 increment = None 4521 4522 if self._match(TokenType.L_PAREN, advance=False): 4523 args = self._parse_wrapped_csv(self._parse_bitwise) 4524 start = seq_get(args, 0) 4525 increment = seq_get(args, 1) 4526 elif self._match_text_seq("START"): 4527 start = self._parse_bitwise() 4528 self._match_text_seq("INCREMENT") 4529 increment = self._parse_bitwise() 4530 4531 if start and increment: 4532 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4533 4534 return exp.AutoIncrementColumnConstraint() 4535 4536 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4537 if not self._match_text_seq("REFRESH"): 4538 self._retreat(self._index - 1) 4539 return None 4540 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4541 4542 def _parse_compress(self) -> exp.CompressColumnConstraint: 4543 if self._match(TokenType.L_PAREN, advance=False): 4544 return self.expression( 4545 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4546 ) 4547 4548 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4549 4550 def _parse_generated_as_identity( 4551 self, 4552 ) -> ( 4553 exp.GeneratedAsIdentityColumnConstraint 4554 | exp.ComputedColumnConstraint 4555 | exp.GeneratedAsRowColumnConstraint 4556 ): 4557 if self._match_text_seq("BY", "DEFAULT"): 4558 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4559 this = self.expression( 4560 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4561 ) 4562 else: 4563 self._match_text_seq("ALWAYS") 4564 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4565 4566 self._match(TokenType.ALIAS) 4567 4568 if self._match_text_seq("ROW"): 4569 start = self._match_text_seq("START") 4570 if not start: 4571 self._match(TokenType.END) 4572 hidden = self._match_text_seq("HIDDEN") 4573 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4574 4575 identity = self._match_text_seq("IDENTITY") 4576 4577 if self._match(TokenType.L_PAREN): 4578 if self._match(TokenType.START_WITH): 4579 this.set("start", self._parse_bitwise()) 4580 if self._match_text_seq("INCREMENT", "BY"): 4581 this.set("increment", self._parse_bitwise()) 4582 if self._match_text_seq("MINVALUE"): 4583 this.set("minvalue", self._parse_bitwise()) 4584 if self._match_text_seq("MAXVALUE"): 4585 this.set("maxvalue", self._parse_bitwise()) 4586 4587 if self._match_text_seq("CYCLE"): 4588 this.set("cycle", True) 4589 elif self._match_text_seq("NO", "CYCLE"): 4590 this.set("cycle", False) 4591 4592 if not identity: 4593 this.set("expression", self._parse_bitwise()) 4594 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4595 args = self._parse_csv(self._parse_bitwise) 4596 this.set("start", seq_get(args, 0)) 4597 this.set("increment", seq_get(args, 1)) 4598 4599 self._match_r_paren() 4600 4601 return this 4602 4603 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4604 self._match_text_seq("LENGTH") 4605 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4606 4607 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4608 if self._match_text_seq("NULL"): 4609 return self.expression(exp.NotNullColumnConstraint) 4610 if self._match_text_seq("CASESPECIFIC"): 4611 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4612 if self._match_text_seq("FOR", "REPLICATION"): 4613 return self.expression(exp.NotForReplicationColumnConstraint) 4614 return None 4615 4616 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4617 if self._match(TokenType.CONSTRAINT): 4618 this = self._parse_id_var() 4619 else: 4620 this = None 4621 4622 if self._match_texts(self.CONSTRAINT_PARSERS): 4623 return self.expression( 4624 exp.ColumnConstraint, 4625 this=this, 4626 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4627 ) 4628 4629 return this 4630 4631 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4632 if not self._match(TokenType.CONSTRAINT): 4633 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4634 4635 return self.expression( 4636 exp.Constraint, 4637 this=self._parse_id_var(), 4638 expressions=self._parse_unnamed_constraints(), 4639 ) 4640 4641 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4642 constraints = [] 4643 while True: 4644 constraint = self._parse_unnamed_constraint() or self._parse_function() 4645 if not constraint: 4646 break 4647 constraints.append(constraint) 4648 4649 return constraints 4650 4651 def _parse_unnamed_constraint( 4652 self, constraints: t.Optional[t.Collection[str]] = None 4653 ) -> t.Optional[exp.Expression]: 4654 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4655 constraints or self.CONSTRAINT_PARSERS 4656 ): 4657 return None 4658 4659 constraint = self._prev.text.upper() 4660 if constraint not in self.CONSTRAINT_PARSERS: 4661 self.raise_error(f"No parser found for schema constraint {constraint}.") 4662 4663 return self.CONSTRAINT_PARSERS[constraint](self) 4664 4665 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4666 self._match_text_seq("KEY") 4667 return self.expression( 4668 exp.UniqueColumnConstraint, 4669 this=self._parse_schema(self._parse_id_var(any_token=False)), 4670 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4671 on_conflict=self._parse_on_conflict(), 4672 ) 4673 4674 def _parse_key_constraint_options(self) -> t.List[str]: 4675 options = [] 4676 while True: 4677 if not self._curr: 4678 break 4679 4680 if self._match(TokenType.ON): 4681 action = None 4682 on = self._advance_any() and self._prev.text 4683 4684 if self._match_text_seq("NO", "ACTION"): 4685 action = "NO ACTION" 4686 elif self._match_text_seq("CASCADE"): 4687 action = "CASCADE" 4688 elif self._match_text_seq("RESTRICT"): 4689 action = "RESTRICT" 4690 elif self._match_pair(TokenType.SET, TokenType.NULL): 4691 action = "SET NULL" 4692 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4693 action = "SET DEFAULT" 4694 else: 4695 self.raise_error("Invalid key constraint") 4696 4697 options.append(f"ON {on} {action}") 4698 elif self._match_text_seq("NOT", "ENFORCED"): 4699 options.append("NOT ENFORCED") 4700 elif self._match_text_seq("DEFERRABLE"): 4701 options.append("DEFERRABLE") 4702 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4703 options.append("INITIALLY DEFERRED") 4704 elif self._match_text_seq("NORELY"): 4705 options.append("NORELY") 4706 elif self._match_text_seq("MATCH", "FULL"): 4707 options.append("MATCH FULL") 4708 else: 4709 break 4710 4711 return options 4712 4713 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4714 if match and not self._match(TokenType.REFERENCES): 4715 return None 4716 4717 expressions = None 4718 this = self._parse_table(schema=True) 4719 options = self._parse_key_constraint_options() 4720 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4721 4722 def _parse_foreign_key(self) -> exp.ForeignKey: 4723 expressions = self._parse_wrapped_id_vars() 4724 reference = self._parse_references() 4725 options = {} 4726 4727 while self._match(TokenType.ON): 4728 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4729 self.raise_error("Expected DELETE or UPDATE") 4730 4731 kind = self._prev.text.lower() 4732 4733 if self._match_text_seq("NO", "ACTION"): 4734 action = "NO ACTION" 4735 elif self._match(TokenType.SET): 4736 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4737 action = "SET " + self._prev.text.upper() 4738 else: 4739 self._advance() 4740 action = self._prev.text.upper() 4741 4742 options[kind] = action 4743 4744 return self.expression( 4745 exp.ForeignKey, 4746 expressions=expressions, 4747 reference=reference, 4748 **options, # type: ignore 4749 ) 4750 4751 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4752 return self._parse_field() 4753 4754 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4755 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4756 self._retreat(self._index - 1) 4757 return None 4758 4759 id_vars = self._parse_wrapped_id_vars() 4760 return self.expression( 4761 exp.PeriodForSystemTimeConstraint, 4762 this=seq_get(id_vars, 0), 4763 expression=seq_get(id_vars, 1), 4764 ) 4765 4766 def _parse_primary_key( 4767 self, wrapped_optional: bool = False, in_props: bool = False 4768 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4769 desc = ( 4770 self._match_set((TokenType.ASC, TokenType.DESC)) 4771 and self._prev.token_type == TokenType.DESC 4772 ) 4773 4774 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4775 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4776 4777 expressions = self._parse_wrapped_csv( 4778 self._parse_primary_key_part, optional=wrapped_optional 4779 ) 4780 options = self._parse_key_constraint_options() 4781 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4782 4783 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4784 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4785 4786 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4787 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4788 return this 4789 4790 bracket_kind = self._prev.token_type 4791 expressions = self._parse_csv( 4792 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4793 ) 4794 4795 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4796 self.raise_error("Expected ]") 4797 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4798 self.raise_error("Expected }") 4799 4800 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4801 if bracket_kind == TokenType.L_BRACE: 4802 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4803 elif not this or this.name.upper() == "ARRAY": 4804 this = self.expression(exp.Array, expressions=expressions) 4805 else: 4806 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4807 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4808 4809 self._add_comments(this) 4810 return self._parse_bracket(this) 4811 4812 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4813 if self._match(TokenType.COLON): 4814 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4815 return this 4816 4817 def _parse_case(self) -> t.Optional[exp.Expression]: 4818 ifs = [] 4819 default = None 4820 4821 comments = self._prev_comments 4822 expression = self._parse_conjunction() 4823 4824 while self._match(TokenType.WHEN): 4825 this = self._parse_conjunction() 4826 self._match(TokenType.THEN) 4827 then = self._parse_conjunction() 4828 ifs.append(self.expression(exp.If, this=this, true=then)) 4829 4830 if self._match(TokenType.ELSE): 4831 default = self._parse_conjunction() 4832 4833 if not self._match(TokenType.END): 4834 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4835 default = exp.column("interval") 4836 else: 4837 self.raise_error("Expected END after CASE", self._prev) 4838 4839 return self.expression( 4840 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4841 ) 4842 4843 def _parse_if(self) -> t.Optional[exp.Expression]: 4844 if self._match(TokenType.L_PAREN): 4845 args = self._parse_csv(self._parse_conjunction) 4846 this = self.validate_expression(exp.If.from_arg_list(args), args) 4847 self._match_r_paren() 4848 else: 4849 index = self._index - 1 4850 4851 if self.NO_PAREN_IF_COMMANDS and index == 0: 4852 return self._parse_as_command(self._prev) 4853 4854 condition = self._parse_conjunction() 4855 4856 if not condition: 4857 self._retreat(index) 4858 return None 4859 4860 self._match(TokenType.THEN) 4861 true = self._parse_conjunction() 4862 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4863 self._match(TokenType.END) 4864 this = self.expression(exp.If, this=condition, true=true, false=false) 4865 4866 return this 4867 4868 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4869 if not self._match_text_seq("VALUE", "FOR"): 4870 self._retreat(self._index - 1) 4871 return None 4872 4873 return self.expression( 4874 exp.NextValueFor, 4875 this=self._parse_column(), 4876 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4877 ) 4878 4879 def _parse_extract(self) -> exp.Extract: 4880 this = self._parse_function() or self._parse_var() or self._parse_type() 4881 4882 if self._match(TokenType.FROM): 4883 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4884 4885 if not self._match(TokenType.COMMA): 4886 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4887 4888 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4889 4890 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4891 this = self._parse_conjunction() 4892 4893 if not self._match(TokenType.ALIAS): 4894 if self._match(TokenType.COMMA): 4895 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4896 4897 self.raise_error("Expected AS after CAST") 4898 4899 fmt = None 4900 to = self._parse_types() 4901 4902 if self._match(TokenType.FORMAT): 4903 fmt_string = self._parse_string() 4904 fmt = self._parse_at_time_zone(fmt_string) 4905 4906 if not to: 4907 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4908 if to.this in exp.DataType.TEMPORAL_TYPES: 4909 this = self.expression( 4910 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4911 this=this, 4912 format=exp.Literal.string( 4913 format_time( 4914 fmt_string.this if fmt_string else "", 4915 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4916 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4917 ) 4918 ), 4919 ) 4920 4921 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4922 this.set("zone", fmt.args["zone"]) 4923 return this 4924 elif not to: 4925 self.raise_error("Expected TYPE after CAST") 4926 elif isinstance(to, exp.Identifier): 4927 to = exp.DataType.build(to.name, udt=True) 4928 elif to.this == exp.DataType.Type.CHAR: 4929 if self._match(TokenType.CHARACTER_SET): 4930 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4931 4932 return self.expression( 4933 exp.Cast if strict else exp.TryCast, 4934 this=this, 4935 to=to, 4936 format=fmt, 4937 safe=safe, 4938 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 4939 ) 4940 4941 def _parse_string_agg(self) -> exp.Expression: 4942 if self._match(TokenType.DISTINCT): 4943 args: t.List[t.Optional[exp.Expression]] = [ 4944 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4945 ] 4946 if self._match(TokenType.COMMA): 4947 args.extend(self._parse_csv(self._parse_conjunction)) 4948 else: 4949 args = self._parse_csv(self._parse_conjunction) # type: ignore 4950 4951 index = self._index 4952 if not self._match(TokenType.R_PAREN) and args: 4953 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4954 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4955 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4956 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4957 4958 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4959 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4960 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4961 if not self._match_text_seq("WITHIN", "GROUP"): 4962 self._retreat(index) 4963 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4964 4965 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4966 order = self._parse_order(this=seq_get(args, 0)) 4967 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4968 4969 def _parse_convert( 4970 self, strict: bool, safe: t.Optional[bool] = None 4971 ) -> t.Optional[exp.Expression]: 4972 this = self._parse_bitwise() 4973 4974 if self._match(TokenType.USING): 4975 to: t.Optional[exp.Expression] = self.expression( 4976 exp.CharacterSet, this=self._parse_var() 4977 ) 4978 elif self._match(TokenType.COMMA): 4979 to = self._parse_types() 4980 else: 4981 to = None 4982 4983 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4984 4985 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4986 """ 4987 There are generally two variants of the DECODE function: 4988 4989 - DECODE(bin, charset) 4990 - DECODE(expression, search, result [, search, result] ... [, default]) 4991 4992 The second variant will always be parsed into a CASE expression. Note that NULL 4993 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4994 instead of relying on pattern matching. 4995 """ 4996 args = self._parse_csv(self._parse_conjunction) 4997 4998 if len(args) < 3: 4999 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5000 5001 expression, *expressions = args 5002 if not expression: 5003 return None 5004 5005 ifs = [] 5006 for search, result in zip(expressions[::2], expressions[1::2]): 5007 if not search or not result: 5008 return None 5009 5010 if isinstance(search, exp.Literal): 5011 ifs.append( 5012 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5013 ) 5014 elif isinstance(search, exp.Null): 5015 ifs.append( 5016 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5017 ) 5018 else: 5019 cond = exp.or_( 5020 exp.EQ(this=expression.copy(), expression=search), 5021 exp.and_( 5022 exp.Is(this=expression.copy(), expression=exp.Null()), 5023 exp.Is(this=search.copy(), expression=exp.Null()), 5024 copy=False, 5025 ), 5026 copy=False, 5027 ) 5028 ifs.append(exp.If(this=cond, true=result)) 5029 5030 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5031 5032 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5033 self._match_text_seq("KEY") 5034 key = self._parse_column() 5035 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5036 self._match_text_seq("VALUE") 5037 value = self._parse_bitwise() 5038 5039 if not key and not value: 5040 return None 5041 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5042 5043 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5044 if not this or not self._match_text_seq("FORMAT", "JSON"): 5045 return this 5046 5047 return self.expression(exp.FormatJson, this=this) 5048 5049 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5050 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5051 for value in values: 5052 if self._match_text_seq(value, "ON", on): 5053 return f"{value} ON {on}" 5054 5055 return None 5056 5057 @t.overload 5058 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5059 5060 @t.overload 5061 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5062 5063 def _parse_json_object(self, agg=False): 5064 star = self._parse_star() 5065 expressions = ( 5066 [star] 5067 if star 5068 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5069 ) 5070 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5071 5072 unique_keys = None 5073 if self._match_text_seq("WITH", "UNIQUE"): 5074 unique_keys = True 5075 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5076 unique_keys = False 5077 5078 self._match_text_seq("KEYS") 5079 5080 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5081 self._parse_type() 5082 ) 5083 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5084 5085 return self.expression( 5086 exp.JSONObjectAgg if agg else exp.JSONObject, 5087 expressions=expressions, 5088 null_handling=null_handling, 5089 unique_keys=unique_keys, 5090 return_type=return_type, 5091 encoding=encoding, 5092 ) 5093 5094 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5095 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5096 if not self._match_text_seq("NESTED"): 5097 this = self._parse_id_var() 5098 kind = self._parse_types(allow_identifiers=False) 5099 nested = None 5100 else: 5101 this = None 5102 kind = None 5103 nested = True 5104 5105 path = self._match_text_seq("PATH") and self._parse_string() 5106 nested_schema = nested and self._parse_json_schema() 5107 5108 return self.expression( 5109 exp.JSONColumnDef, 5110 this=this, 5111 kind=kind, 5112 path=path, 5113 nested_schema=nested_schema, 5114 ) 5115 5116 def _parse_json_schema(self) -> exp.JSONSchema: 5117 self._match_text_seq("COLUMNS") 5118 return self.expression( 5119 exp.JSONSchema, 5120 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5121 ) 5122 5123 def _parse_json_table(self) -> exp.JSONTable: 5124 this = self._parse_format_json(self._parse_bitwise()) 5125 path = self._match(TokenType.COMMA) and self._parse_string() 5126 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5127 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5128 schema = self._parse_json_schema() 5129 5130 return exp.JSONTable( 5131 this=this, 5132 schema=schema, 5133 path=path, 5134 error_handling=error_handling, 5135 empty_handling=empty_handling, 5136 ) 5137 5138 def _parse_match_against(self) -> exp.MatchAgainst: 5139 expressions = self._parse_csv(self._parse_column) 5140 5141 self._match_text_seq(")", "AGAINST", "(") 5142 5143 this = self._parse_string() 5144 5145 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5146 modifier = "IN NATURAL LANGUAGE MODE" 5147 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5148 modifier = f"{modifier} WITH QUERY EXPANSION" 5149 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5150 modifier = "IN BOOLEAN MODE" 5151 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5152 modifier = "WITH QUERY EXPANSION" 5153 else: 5154 modifier = None 5155 5156 return self.expression( 5157 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5158 ) 5159 5160 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5161 def _parse_open_json(self) -> exp.OpenJSON: 5162 this = self._parse_bitwise() 5163 path = self._match(TokenType.COMMA) and self._parse_string() 5164 5165 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5166 this = self._parse_field(any_token=True) 5167 kind = self._parse_types() 5168 path = self._parse_string() 5169 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5170 5171 return self.expression( 5172 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5173 ) 5174 5175 expressions = None 5176 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5177 self._match_l_paren() 5178 expressions = self._parse_csv(_parse_open_json_column_def) 5179 5180 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5181 5182 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5183 args = self._parse_csv(self._parse_bitwise) 5184 5185 if self._match(TokenType.IN): 5186 return self.expression( 5187 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5188 ) 5189 5190 if haystack_first: 5191 haystack = seq_get(args, 0) 5192 needle = seq_get(args, 1) 5193 else: 5194 needle = seq_get(args, 0) 5195 haystack = seq_get(args, 1) 5196 5197 return self.expression( 5198 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5199 ) 5200 5201 def _parse_predict(self) -> exp.Predict: 5202 self._match_text_seq("MODEL") 5203 this = self._parse_table() 5204 5205 self._match(TokenType.COMMA) 5206 self._match_text_seq("TABLE") 5207 5208 return self.expression( 5209 exp.Predict, 5210 this=this, 5211 expression=self._parse_table(), 5212 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5213 ) 5214 5215 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5216 args = self._parse_csv(self._parse_table) 5217 return exp.JoinHint(this=func_name.upper(), expressions=args) 5218 5219 def _parse_substring(self) -> exp.Substring: 5220 # Postgres supports the form: substring(string [from int] [for int]) 5221 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5222 5223 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5224 5225 if self._match(TokenType.FROM): 5226 args.append(self._parse_bitwise()) 5227 if self._match(TokenType.FOR): 5228 args.append(self._parse_bitwise()) 5229 5230 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5231 5232 def _parse_trim(self) -> exp.Trim: 5233 # https://www.w3resource.com/sql/character-functions/trim.php 5234 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5235 5236 position = None 5237 collation = None 5238 expression = None 5239 5240 if self._match_texts(self.TRIM_TYPES): 5241 position = self._prev.text.upper() 5242 5243 this = self._parse_bitwise() 5244 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5245 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5246 expression = self._parse_bitwise() 5247 5248 if invert_order: 5249 this, expression = expression, this 5250 5251 if self._match(TokenType.COLLATE): 5252 collation = self._parse_bitwise() 5253 5254 return self.expression( 5255 exp.Trim, this=this, position=position, expression=expression, collation=collation 5256 ) 5257 5258 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5259 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5260 5261 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5262 return self._parse_window(self._parse_id_var(), alias=True) 5263 5264 def _parse_respect_or_ignore_nulls( 5265 self, this: t.Optional[exp.Expression] 5266 ) -> t.Optional[exp.Expression]: 5267 if self._match_text_seq("IGNORE", "NULLS"): 5268 return self.expression(exp.IgnoreNulls, this=this) 5269 if self._match_text_seq("RESPECT", "NULLS"): 5270 return self.expression(exp.RespectNulls, this=this) 5271 return this 5272 5273 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5274 if self._match(TokenType.HAVING): 5275 self._match_texts(("MAX", "MIN")) 5276 max = self._prev.text.upper() != "MIN" 5277 return self.expression( 5278 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5279 ) 5280 5281 return this 5282 5283 def _parse_window( 5284 self, this: t.Optional[exp.Expression], alias: bool = False 5285 ) -> t.Optional[exp.Expression]: 5286 func = this 5287 comments = func.comments if isinstance(func, exp.Expression) else None 5288 5289 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5290 self._match(TokenType.WHERE) 5291 this = self.expression( 5292 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5293 ) 5294 self._match_r_paren() 5295 5296 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5297 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5298 if self._match_text_seq("WITHIN", "GROUP"): 5299 order = self._parse_wrapped(self._parse_order) 5300 this = self.expression(exp.WithinGroup, this=this, expression=order) 5301 5302 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5303 # Some dialects choose to implement and some do not. 5304 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5305 5306 # There is some code above in _parse_lambda that handles 5307 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5308 5309 # The below changes handle 5310 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5311 5312 # Oracle allows both formats 5313 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5314 # and Snowflake chose to do the same for familiarity 5315 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5316 if isinstance(this, exp.AggFunc): 5317 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5318 5319 if ignore_respect and ignore_respect is not this: 5320 ignore_respect.replace(ignore_respect.this) 5321 this = self.expression(ignore_respect.__class__, this=this) 5322 5323 this = self._parse_respect_or_ignore_nulls(this) 5324 5325 # bigquery select from window x AS (partition by ...) 5326 if alias: 5327 over = None 5328 self._match(TokenType.ALIAS) 5329 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5330 return this 5331 else: 5332 over = self._prev.text.upper() 5333 5334 if comments: 5335 func.comments = None # type: ignore 5336 5337 if not self._match(TokenType.L_PAREN): 5338 return self.expression( 5339 exp.Window, 5340 comments=comments, 5341 this=this, 5342 alias=self._parse_id_var(False), 5343 over=over, 5344 ) 5345 5346 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5347 5348 first = self._match(TokenType.FIRST) 5349 if self._match_text_seq("LAST"): 5350 first = False 5351 5352 partition, order = self._parse_partition_and_order() 5353 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5354 5355 if kind: 5356 self._match(TokenType.BETWEEN) 5357 start = self._parse_window_spec() 5358 self._match(TokenType.AND) 5359 end = self._parse_window_spec() 5360 5361 spec = self.expression( 5362 exp.WindowSpec, 5363 kind=kind, 5364 start=start["value"], 5365 start_side=start["side"], 5366 end=end["value"], 5367 end_side=end["side"], 5368 ) 5369 else: 5370 spec = None 5371 5372 self._match_r_paren() 5373 5374 window = self.expression( 5375 exp.Window, 5376 comments=comments, 5377 this=this, 5378 partition_by=partition, 5379 order=order, 5380 spec=spec, 5381 alias=window_alias, 5382 over=over, 5383 first=first, 5384 ) 5385 5386 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5387 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5388 return self._parse_window(window, alias=alias) 5389 5390 return window 5391 5392 def _parse_partition_and_order( 5393 self, 5394 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5395 return self._parse_partition_by(), self._parse_order() 5396 5397 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5398 self._match(TokenType.BETWEEN) 5399 5400 return { 5401 "value": ( 5402 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5403 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5404 or self._parse_bitwise() 5405 ), 5406 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5407 } 5408 5409 def _parse_alias( 5410 self, this: t.Optional[exp.Expression], explicit: bool = False 5411 ) -> t.Optional[exp.Expression]: 5412 any_token = self._match(TokenType.ALIAS) 5413 comments = self._prev_comments 5414 5415 if explicit and not any_token: 5416 return this 5417 5418 if self._match(TokenType.L_PAREN): 5419 aliases = self.expression( 5420 exp.Aliases, 5421 comments=comments, 5422 this=this, 5423 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5424 ) 5425 self._match_r_paren(aliases) 5426 return aliases 5427 5428 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5429 self.STRING_ALIASES and self._parse_string_as_identifier() 5430 ) 5431 5432 if alias: 5433 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5434 column = this.this 5435 5436 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5437 if not this.comments and column and column.comments: 5438 this.comments = column.comments 5439 column.comments = None 5440 5441 return this 5442 5443 def _parse_id_var( 5444 self, 5445 any_token: bool = True, 5446 tokens: t.Optional[t.Collection[TokenType]] = None, 5447 ) -> t.Optional[exp.Expression]: 5448 identifier = self._parse_identifier() 5449 5450 if identifier: 5451 return identifier 5452 5453 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5454 quoted = self._prev.token_type == TokenType.STRING 5455 return exp.Identifier(this=self._prev.text, quoted=quoted) 5456 5457 return None 5458 5459 def _parse_string(self) -> t.Optional[exp.Expression]: 5460 if self._match_set(self.STRING_PARSERS): 5461 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5462 return self._parse_placeholder() 5463 5464 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5465 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5466 5467 def _parse_number(self) -> t.Optional[exp.Expression]: 5468 if self._match_set(self.NUMERIC_PARSERS): 5469 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5470 return self._parse_placeholder() 5471 5472 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5473 if self._match(TokenType.IDENTIFIER): 5474 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5475 return self._parse_placeholder() 5476 5477 def _parse_var( 5478 self, 5479 any_token: bool = False, 5480 tokens: t.Optional[t.Collection[TokenType]] = None, 5481 upper: bool = False, 5482 ) -> t.Optional[exp.Expression]: 5483 if ( 5484 (any_token and self._advance_any()) 5485 or self._match(TokenType.VAR) 5486 or (self._match_set(tokens) if tokens else False) 5487 ): 5488 return self.expression( 5489 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5490 ) 5491 return self._parse_placeholder() 5492 5493 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5494 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5495 self._advance() 5496 return self._prev 5497 return None 5498 5499 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5500 return self._parse_var() or self._parse_string() 5501 5502 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5503 return self._parse_primary() or self._parse_var(any_token=True) 5504 5505 def _parse_null(self) -> t.Optional[exp.Expression]: 5506 if self._match_set(self.NULL_TOKENS): 5507 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5508 return self._parse_placeholder() 5509 5510 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5511 if self._match(TokenType.TRUE): 5512 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5513 if self._match(TokenType.FALSE): 5514 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5515 return self._parse_placeholder() 5516 5517 def _parse_star(self) -> t.Optional[exp.Expression]: 5518 if self._match(TokenType.STAR): 5519 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5520 return self._parse_placeholder() 5521 5522 def _parse_parameter(self) -> exp.Parameter: 5523 self._match(TokenType.L_BRACE) 5524 this = self._parse_identifier() or self._parse_primary_or_var() 5525 expression = self._match(TokenType.COLON) and ( 5526 self._parse_identifier() or self._parse_primary_or_var() 5527 ) 5528 self._match(TokenType.R_BRACE) 5529 return self.expression(exp.Parameter, this=this, expression=expression) 5530 5531 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5532 if self._match_set(self.PLACEHOLDER_PARSERS): 5533 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5534 if placeholder: 5535 return placeholder 5536 self._advance(-1) 5537 return None 5538 5539 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5540 if not self._match(TokenType.EXCEPT): 5541 return None 5542 if self._match(TokenType.L_PAREN, advance=False): 5543 return self._parse_wrapped_csv(self._parse_column) 5544 5545 except_column = self._parse_column() 5546 return [except_column] if except_column else None 5547 5548 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5549 if not self._match(TokenType.REPLACE): 5550 return None 5551 if self._match(TokenType.L_PAREN, advance=False): 5552 return self._parse_wrapped_csv(self._parse_expression) 5553 5554 replace_expression = self._parse_expression() 5555 return [replace_expression] if replace_expression else None 5556 5557 def _parse_csv( 5558 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5559 ) -> t.List[exp.Expression]: 5560 parse_result = parse_method() 5561 items = [parse_result] if parse_result is not None else [] 5562 5563 while self._match(sep): 5564 self._add_comments(parse_result) 5565 parse_result = parse_method() 5566 if parse_result is not None: 5567 items.append(parse_result) 5568 5569 return items 5570 5571 def _parse_tokens( 5572 self, parse_method: t.Callable, expressions: t.Dict 5573 ) -> t.Optional[exp.Expression]: 5574 this = parse_method() 5575 5576 while self._match_set(expressions): 5577 this = self.expression( 5578 expressions[self._prev.token_type], 5579 this=this, 5580 comments=self._prev_comments, 5581 expression=parse_method(), 5582 ) 5583 5584 return this 5585 5586 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5587 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5588 5589 def _parse_wrapped_csv( 5590 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5591 ) -> t.List[exp.Expression]: 5592 return self._parse_wrapped( 5593 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5594 ) 5595 5596 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5597 wrapped = self._match(TokenType.L_PAREN) 5598 if not wrapped and not optional: 5599 self.raise_error("Expecting (") 5600 parse_result = parse_method() 5601 if wrapped: 5602 self._match_r_paren() 5603 return parse_result 5604 5605 def _parse_expressions(self) -> t.List[exp.Expression]: 5606 return self._parse_csv(self._parse_expression) 5607 5608 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5609 return self._parse_select() or self._parse_set_operations( 5610 self._parse_expression() if alias else self._parse_conjunction() 5611 ) 5612 5613 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5614 return self._parse_query_modifiers( 5615 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5616 ) 5617 5618 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5619 this = None 5620 if self._match_texts(self.TRANSACTION_KIND): 5621 this = self._prev.text 5622 5623 self._match_texts(("TRANSACTION", "WORK")) 5624 5625 modes = [] 5626 while True: 5627 mode = [] 5628 while self._match(TokenType.VAR): 5629 mode.append(self._prev.text) 5630 5631 if mode: 5632 modes.append(" ".join(mode)) 5633 if not self._match(TokenType.COMMA): 5634 break 5635 5636 return self.expression(exp.Transaction, this=this, modes=modes) 5637 5638 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5639 chain = None 5640 savepoint = None 5641 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5642 5643 self._match_texts(("TRANSACTION", "WORK")) 5644 5645 if self._match_text_seq("TO"): 5646 self._match_text_seq("SAVEPOINT") 5647 savepoint = self._parse_id_var() 5648 5649 if self._match(TokenType.AND): 5650 chain = not self._match_text_seq("NO") 5651 self._match_text_seq("CHAIN") 5652 5653 if is_rollback: 5654 return self.expression(exp.Rollback, savepoint=savepoint) 5655 5656 return self.expression(exp.Commit, chain=chain) 5657 5658 def _parse_refresh(self) -> exp.Refresh: 5659 self._match(TokenType.TABLE) 5660 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5661 5662 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5663 if not self._match_text_seq("ADD"): 5664 return None 5665 5666 self._match(TokenType.COLUMN) 5667 exists_column = self._parse_exists(not_=True) 5668 expression = self._parse_field_def() 5669 5670 if expression: 5671 expression.set("exists", exists_column) 5672 5673 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5674 if self._match_texts(("FIRST", "AFTER")): 5675 position = self._prev.text 5676 column_position = self.expression( 5677 exp.ColumnPosition, this=self._parse_column(), position=position 5678 ) 5679 expression.set("position", column_position) 5680 5681 return expression 5682 5683 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5684 drop = self._match(TokenType.DROP) and self._parse_drop() 5685 if drop and not isinstance(drop, exp.Command): 5686 drop.set("kind", drop.args.get("kind", "COLUMN")) 5687 return drop 5688 5689 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5690 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5691 return self.expression( 5692 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5693 ) 5694 5695 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5696 index = self._index - 1 5697 5698 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5699 return self._parse_csv( 5700 lambda: self.expression( 5701 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5702 ) 5703 ) 5704 5705 self._retreat(index) 5706 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5707 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5708 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5709 5710 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5711 self._match(TokenType.COLUMN) 5712 column = self._parse_field(any_token=True) 5713 5714 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5715 return self.expression(exp.AlterColumn, this=column, drop=True) 5716 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5717 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5718 if self._match(TokenType.COMMENT): 5719 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5720 5721 self._match_text_seq("SET", "DATA") 5722 self._match_text_seq("TYPE") 5723 return self.expression( 5724 exp.AlterColumn, 5725 this=column, 5726 dtype=self._parse_types(), 5727 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5728 using=self._match(TokenType.USING) and self._parse_conjunction(), 5729 ) 5730 5731 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5732 index = self._index - 1 5733 5734 partition_exists = self._parse_exists() 5735 if self._match(TokenType.PARTITION, advance=False): 5736 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5737 5738 self._retreat(index) 5739 return self._parse_csv(self._parse_drop_column) 5740 5741 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5742 if self._match(TokenType.COLUMN): 5743 exists = self._parse_exists() 5744 old_column = self._parse_column() 5745 to = self._match_text_seq("TO") 5746 new_column = self._parse_column() 5747 5748 if old_column is None or to is None or new_column is None: 5749 return None 5750 5751 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5752 5753 self._match_text_seq("TO") 5754 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5755 5756 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5757 start = self._prev 5758 5759 if not self._match(TokenType.TABLE): 5760 return self._parse_as_command(start) 5761 5762 exists = self._parse_exists() 5763 only = self._match_text_seq("ONLY") 5764 this = self._parse_table(schema=True) 5765 5766 if self._next: 5767 self._advance() 5768 5769 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5770 if parser: 5771 actions = ensure_list(parser(self)) 5772 options = self._parse_csv(self._parse_property) 5773 5774 if not self._curr and actions: 5775 return self.expression( 5776 exp.AlterTable, 5777 this=this, 5778 exists=exists, 5779 actions=actions, 5780 only=only, 5781 options=options, 5782 ) 5783 5784 return self._parse_as_command(start) 5785 5786 def _parse_merge(self) -> exp.Merge: 5787 self._match(TokenType.INTO) 5788 target = self._parse_table() 5789 5790 if target and self._match(TokenType.ALIAS, advance=False): 5791 target.set("alias", self._parse_table_alias()) 5792 5793 self._match(TokenType.USING) 5794 using = self._parse_table() 5795 5796 self._match(TokenType.ON) 5797 on = self._parse_conjunction() 5798 5799 return self.expression( 5800 exp.Merge, 5801 this=target, 5802 using=using, 5803 on=on, 5804 expressions=self._parse_when_matched(), 5805 ) 5806 5807 def _parse_when_matched(self) -> t.List[exp.When]: 5808 whens = [] 5809 5810 while self._match(TokenType.WHEN): 5811 matched = not self._match(TokenType.NOT) 5812 self._match_text_seq("MATCHED") 5813 source = ( 5814 False 5815 if self._match_text_seq("BY", "TARGET") 5816 else self._match_text_seq("BY", "SOURCE") 5817 ) 5818 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5819 5820 self._match(TokenType.THEN) 5821 5822 if self._match(TokenType.INSERT): 5823 _this = self._parse_star() 5824 if _this: 5825 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5826 else: 5827 then = self.expression( 5828 exp.Insert, 5829 this=self._parse_value(), 5830 expression=self._match_text_seq("VALUES") and self._parse_value(), 5831 ) 5832 elif self._match(TokenType.UPDATE): 5833 expressions = self._parse_star() 5834 if expressions: 5835 then = self.expression(exp.Update, expressions=expressions) 5836 else: 5837 then = self.expression( 5838 exp.Update, 5839 expressions=self._match(TokenType.SET) 5840 and self._parse_csv(self._parse_equality), 5841 ) 5842 elif self._match(TokenType.DELETE): 5843 then = self.expression(exp.Var, this=self._prev.text) 5844 else: 5845 then = None 5846 5847 whens.append( 5848 self.expression( 5849 exp.When, 5850 matched=matched, 5851 source=source, 5852 condition=condition, 5853 then=then, 5854 ) 5855 ) 5856 return whens 5857 5858 def _parse_show(self) -> t.Optional[exp.Expression]: 5859 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5860 if parser: 5861 return parser(self) 5862 return self._parse_as_command(self._prev) 5863 5864 def _parse_set_item_assignment( 5865 self, kind: t.Optional[str] = None 5866 ) -> t.Optional[exp.Expression]: 5867 index = self._index 5868 5869 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5870 return self._parse_set_transaction(global_=kind == "GLOBAL") 5871 5872 left = self._parse_primary() or self._parse_id_var() 5873 assignment_delimiter = self._match_texts(("=", "TO")) 5874 5875 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5876 self._retreat(index) 5877 return None 5878 5879 right = self._parse_statement() or self._parse_id_var() 5880 this = self.expression(exp.EQ, this=left, expression=right) 5881 5882 return self.expression(exp.SetItem, this=this, kind=kind) 5883 5884 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5885 self._match_text_seq("TRANSACTION") 5886 characteristics = self._parse_csv( 5887 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5888 ) 5889 return self.expression( 5890 exp.SetItem, 5891 expressions=characteristics, 5892 kind="TRANSACTION", 5893 **{"global": global_}, # type: ignore 5894 ) 5895 5896 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5897 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5898 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5899 5900 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5901 index = self._index 5902 set_ = self.expression( 5903 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5904 ) 5905 5906 if self._curr: 5907 self._retreat(index) 5908 return self._parse_as_command(self._prev) 5909 5910 return set_ 5911 5912 def _parse_var_from_options( 5913 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5914 ) -> t.Optional[exp.Var]: 5915 start = self._curr 5916 if not start: 5917 return None 5918 5919 option = start.text.upper() 5920 continuations = options.get(option) 5921 5922 index = self._index 5923 self._advance() 5924 for keywords in continuations or []: 5925 if isinstance(keywords, str): 5926 keywords = (keywords,) 5927 5928 if self._match_text_seq(*keywords): 5929 option = f"{option} {' '.join(keywords)}" 5930 break 5931 else: 5932 if continuations or continuations is None: 5933 if raise_unmatched: 5934 self.raise_error(f"Unknown option {option}") 5935 5936 self._retreat(index) 5937 return None 5938 5939 return exp.var(option) 5940 5941 def _parse_as_command(self, start: Token) -> exp.Command: 5942 while self._curr: 5943 self._advance() 5944 text = self._find_sql(start, self._prev) 5945 size = len(start.text) 5946 self._warn_unsupported() 5947 return exp.Command(this=text[:size], expression=text[size:]) 5948 5949 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5950 settings = [] 5951 5952 self._match_l_paren() 5953 kind = self._parse_id_var() 5954 5955 if self._match(TokenType.L_PAREN): 5956 while True: 5957 key = self._parse_id_var() 5958 value = self._parse_primary() 5959 5960 if not key and value is None: 5961 break 5962 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5963 self._match(TokenType.R_PAREN) 5964 5965 self._match_r_paren() 5966 5967 return self.expression( 5968 exp.DictProperty, 5969 this=this, 5970 kind=kind.this if kind else None, 5971 settings=settings, 5972 ) 5973 5974 def _parse_dict_range(self, this: str) -> exp.DictRange: 5975 self._match_l_paren() 5976 has_min = self._match_text_seq("MIN") 5977 if has_min: 5978 min = self._parse_var() or self._parse_primary() 5979 self._match_text_seq("MAX") 5980 max = self._parse_var() or self._parse_primary() 5981 else: 5982 max = self._parse_var() or self._parse_primary() 5983 min = exp.Literal.number(0) 5984 self._match_r_paren() 5985 return self.expression(exp.DictRange, this=this, min=min, max=max) 5986 5987 def _parse_comprehension( 5988 self, this: t.Optional[exp.Expression] 5989 ) -> t.Optional[exp.Comprehension]: 5990 index = self._index 5991 expression = self._parse_column() 5992 if not self._match(TokenType.IN): 5993 self._retreat(index - 1) 5994 return None 5995 iterator = self._parse_column() 5996 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5997 return self.expression( 5998 exp.Comprehension, 5999 this=this, 6000 expression=expression, 6001 iterator=iterator, 6002 condition=condition, 6003 ) 6004 6005 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6006 if self._match(TokenType.HEREDOC_STRING): 6007 return self.expression(exp.Heredoc, this=self._prev.text) 6008 6009 if not self._match_text_seq("$"): 6010 return None 6011 6012 tags = ["$"] 6013 tag_text = None 6014 6015 if self._is_connected(): 6016 self._advance() 6017 tags.append(self._prev.text.upper()) 6018 else: 6019 self.raise_error("No closing $ found") 6020 6021 if tags[-1] != "$": 6022 if self._is_connected() and self._match_text_seq("$"): 6023 tag_text = tags[-1] 6024 tags.append("$") 6025 else: 6026 self.raise_error("No closing $ found") 6027 6028 heredoc_start = self._curr 6029 6030 while self._curr: 6031 if self._match_text_seq(*tags, advance=False): 6032 this = self._find_sql(heredoc_start, self._prev) 6033 self._advance(len(tags)) 6034 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6035 6036 self._advance() 6037 6038 self.raise_error(f"No closing {''.join(tags)} found") 6039 return None 6040 6041 def _find_parser( 6042 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6043 ) -> t.Optional[t.Callable]: 6044 if not self._curr: 6045 return None 6046 6047 index = self._index 6048 this = [] 6049 while True: 6050 # The current token might be multiple words 6051 curr = self._curr.text.upper() 6052 key = curr.split(" ") 6053 this.append(curr) 6054 6055 self._advance() 6056 result, trie = in_trie(trie, key) 6057 if result == TrieResult.FAILED: 6058 break 6059 6060 if result == TrieResult.EXISTS: 6061 subparser = parsers[" ".join(this)] 6062 return subparser 6063 6064 self._retreat(index) 6065 return None 6066 6067 def _match(self, token_type, advance=True, expression=None): 6068 if not self._curr: 6069 return None 6070 6071 if self._curr.token_type == token_type: 6072 if advance: 6073 self._advance() 6074 self._add_comments(expression) 6075 return True 6076 6077 return None 6078 6079 def _match_set(self, types, advance=True): 6080 if not self._curr: 6081 return None 6082 6083 if self._curr.token_type in types: 6084 if advance: 6085 self._advance() 6086 return True 6087 6088 return None 6089 6090 def _match_pair(self, token_type_a, token_type_b, advance=True): 6091 if not self._curr or not self._next: 6092 return None 6093 6094 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6095 if advance: 6096 self._advance(2) 6097 return True 6098 6099 return None 6100 6101 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6102 if not self._match(TokenType.L_PAREN, expression=expression): 6103 self.raise_error("Expecting (") 6104 6105 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6106 if not self._match(TokenType.R_PAREN, expression=expression): 6107 self.raise_error("Expecting )") 6108 6109 def _match_texts(self, texts, advance=True): 6110 if self._curr and self._curr.text.upper() in texts: 6111 if advance: 6112 self._advance() 6113 return True 6114 return None 6115 6116 def _match_text_seq(self, *texts, advance=True): 6117 index = self._index 6118 for text in texts: 6119 if self._curr and self._curr.text.upper() == text: 6120 self._advance() 6121 else: 6122 self._retreat(index) 6123 return None 6124 6125 if not advance: 6126 self._retreat(index) 6127 6128 return True 6129 6130 def _replace_lambda( 6131 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6132 ) -> t.Optional[exp.Expression]: 6133 if not node: 6134 return node 6135 6136 for column in node.find_all(exp.Column): 6137 if column.parts[0].name in lambda_variables: 6138 dot_or_id = column.to_dot() if column.table else column.this 6139 parent = column.parent 6140 6141 while isinstance(parent, exp.Dot): 6142 if not isinstance(parent.parent, exp.Dot): 6143 parent.replace(dot_or_id) 6144 break 6145 parent = parent.parent 6146 else: 6147 if column is node: 6148 node = dot_or_id 6149 else: 6150 column.replace(dot_or_id) 6151 return node 6152 6153 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6154 start = self._prev 6155 6156 # Not to be confused with TRUNCATE(number, decimals) function call 6157 if self._match(TokenType.L_PAREN): 6158 self._retreat(self._index - 2) 6159 return self._parse_function() 6160 6161 # Clickhouse supports TRUNCATE DATABASE as well 6162 is_database = self._match(TokenType.DATABASE) 6163 6164 self._match(TokenType.TABLE) 6165 6166 exists = self._parse_exists(not_=False) 6167 6168 expressions = self._parse_csv( 6169 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6170 ) 6171 6172 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6173 6174 if self._match_text_seq("RESTART", "IDENTITY"): 6175 identity = "RESTART" 6176 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6177 identity = "CONTINUE" 6178 else: 6179 identity = None 6180 6181 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6182 option = self._prev.text 6183 else: 6184 option = None 6185 6186 partition = self._parse_partition() 6187 6188 # Fallback case 6189 if self._curr: 6190 return self._parse_as_command(start) 6191 6192 return self.expression( 6193 exp.TruncateTable, 6194 expressions=expressions, 6195 is_database=is_database, 6196 exists=exists, 6197 cluster=cluster, 6198 identity=identity, 6199 option=option, 6200 partition=partition, 6201 ) 6202 6203 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6204 this = self._parse_ordered(self._parse_opclass) 6205 6206 if not self._match(TokenType.WITH): 6207 return this 6208 6209 op = self._parse_var(any_token=True) 6210 6211 return self.expression(exp.WithOperator, this=this, op=op)
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
52def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 53 # Default argument order is base, expression 54 this = seq_get(args, 0) 55 expression = seq_get(args, 1) 56 57 if expression: 58 if not dialect.LOG_BASE_FIRST: 59 this, expression = expression, this 60 return exp.Log(this=this, expression=expression) 61 62 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
65def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 66 def _builder(args: t.List, dialect: Dialect) -> E: 67 expression = expr_type( 68 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 69 ) 70 if len(args) > 2 and expr_type is exp.JSONExtract: 71 expression.set("expressions", args[2:]) 72 73 return expression 74 75 return _builder
88class Parser(metaclass=_Parser): 89 """ 90 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 91 92 Args: 93 error_level: The desired error level. 94 Default: ErrorLevel.IMMEDIATE 95 error_message_context: The amount of context to capture from a query string when displaying 96 the error message (in number of characters). 97 Default: 100 98 max_errors: Maximum number of error messages to include in a raised ParseError. 99 This is only relevant if error_level is ErrorLevel.RAISE. 100 Default: 3 101 """ 102 103 FUNCTIONS: t.Dict[str, t.Callable] = { 104 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 105 "CONCAT": lambda args, dialect: exp.Concat( 106 expressions=args, 107 safe=not dialect.STRICT_STRING_CONCAT, 108 coalesce=dialect.CONCAT_COALESCE, 109 ), 110 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 111 expressions=args, 112 safe=not dialect.STRICT_STRING_CONCAT, 113 coalesce=dialect.CONCAT_COALESCE, 114 ), 115 "DATE_TO_DATE_STR": lambda args: exp.Cast( 116 this=seq_get(args, 0), 117 to=exp.DataType(this=exp.DataType.Type.TEXT), 118 ), 119 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 120 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 121 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 122 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 123 "LIKE": build_like, 124 "LOG": build_logarithm, 125 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 126 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 127 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 128 "TIME_TO_TIME_STR": lambda args: exp.Cast( 129 this=seq_get(args, 0), 130 to=exp.DataType(this=exp.DataType.Type.TEXT), 131 ), 132 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 133 this=exp.Cast( 134 this=seq_get(args, 0), 135 to=exp.DataType(this=exp.DataType.Type.TEXT), 136 ), 137 start=exp.Literal.number(1), 138 length=exp.Literal.number(10), 139 ), 140 "VAR_MAP": build_var_map, 141 } 142 143 NO_PAREN_FUNCTIONS = { 144 TokenType.CURRENT_DATE: exp.CurrentDate, 145 TokenType.CURRENT_DATETIME: exp.CurrentDate, 146 TokenType.CURRENT_TIME: exp.CurrentTime, 147 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 148 TokenType.CURRENT_USER: exp.CurrentUser, 149 } 150 151 STRUCT_TYPE_TOKENS = { 152 TokenType.NESTED, 153 TokenType.OBJECT, 154 TokenType.STRUCT, 155 } 156 157 NESTED_TYPE_TOKENS = { 158 TokenType.ARRAY, 159 TokenType.LOWCARDINALITY, 160 TokenType.MAP, 161 TokenType.NULLABLE, 162 *STRUCT_TYPE_TOKENS, 163 } 164 165 ENUM_TYPE_TOKENS = { 166 TokenType.ENUM, 167 TokenType.ENUM8, 168 TokenType.ENUM16, 169 } 170 171 AGGREGATE_TYPE_TOKENS = { 172 TokenType.AGGREGATEFUNCTION, 173 TokenType.SIMPLEAGGREGATEFUNCTION, 174 } 175 176 TYPE_TOKENS = { 177 TokenType.BIT, 178 TokenType.BOOLEAN, 179 TokenType.TINYINT, 180 TokenType.UTINYINT, 181 TokenType.SMALLINT, 182 TokenType.USMALLINT, 183 TokenType.INT, 184 TokenType.UINT, 185 TokenType.BIGINT, 186 TokenType.UBIGINT, 187 TokenType.INT128, 188 TokenType.UINT128, 189 TokenType.INT256, 190 TokenType.UINT256, 191 TokenType.MEDIUMINT, 192 TokenType.UMEDIUMINT, 193 TokenType.FIXEDSTRING, 194 TokenType.FLOAT, 195 TokenType.DOUBLE, 196 TokenType.CHAR, 197 TokenType.NCHAR, 198 TokenType.VARCHAR, 199 TokenType.NVARCHAR, 200 TokenType.BPCHAR, 201 TokenType.TEXT, 202 TokenType.MEDIUMTEXT, 203 TokenType.LONGTEXT, 204 TokenType.MEDIUMBLOB, 205 TokenType.LONGBLOB, 206 TokenType.BINARY, 207 TokenType.VARBINARY, 208 TokenType.JSON, 209 TokenType.JSONB, 210 TokenType.INTERVAL, 211 TokenType.TINYBLOB, 212 TokenType.TINYTEXT, 213 TokenType.TIME, 214 TokenType.TIMETZ, 215 TokenType.TIMESTAMP, 216 TokenType.TIMESTAMP_S, 217 TokenType.TIMESTAMP_MS, 218 TokenType.TIMESTAMP_NS, 219 TokenType.TIMESTAMPTZ, 220 TokenType.TIMESTAMPLTZ, 221 TokenType.DATETIME, 222 TokenType.DATETIME64, 223 TokenType.DATE, 224 TokenType.DATE32, 225 TokenType.INT4RANGE, 226 TokenType.INT4MULTIRANGE, 227 TokenType.INT8RANGE, 228 TokenType.INT8MULTIRANGE, 229 TokenType.NUMRANGE, 230 TokenType.NUMMULTIRANGE, 231 TokenType.TSRANGE, 232 TokenType.TSMULTIRANGE, 233 TokenType.TSTZRANGE, 234 TokenType.TSTZMULTIRANGE, 235 TokenType.DATERANGE, 236 TokenType.DATEMULTIRANGE, 237 TokenType.DECIMAL, 238 TokenType.UDECIMAL, 239 TokenType.BIGDECIMAL, 240 TokenType.UUID, 241 TokenType.GEOGRAPHY, 242 TokenType.GEOMETRY, 243 TokenType.HLLSKETCH, 244 TokenType.HSTORE, 245 TokenType.PSEUDO_TYPE, 246 TokenType.SUPER, 247 TokenType.SERIAL, 248 TokenType.SMALLSERIAL, 249 TokenType.BIGSERIAL, 250 TokenType.XML, 251 TokenType.YEAR, 252 TokenType.UNIQUEIDENTIFIER, 253 TokenType.USERDEFINED, 254 TokenType.MONEY, 255 TokenType.SMALLMONEY, 256 TokenType.ROWVERSION, 257 TokenType.IMAGE, 258 TokenType.VARIANT, 259 TokenType.OBJECT, 260 TokenType.OBJECT_IDENTIFIER, 261 TokenType.INET, 262 TokenType.IPADDRESS, 263 TokenType.IPPREFIX, 264 TokenType.IPV4, 265 TokenType.IPV6, 266 TokenType.UNKNOWN, 267 TokenType.NULL, 268 TokenType.NAME, 269 *ENUM_TYPE_TOKENS, 270 *NESTED_TYPE_TOKENS, 271 *AGGREGATE_TYPE_TOKENS, 272 } 273 274 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 275 TokenType.BIGINT: TokenType.UBIGINT, 276 TokenType.INT: TokenType.UINT, 277 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 278 TokenType.SMALLINT: TokenType.USMALLINT, 279 TokenType.TINYINT: TokenType.UTINYINT, 280 TokenType.DECIMAL: TokenType.UDECIMAL, 281 } 282 283 SUBQUERY_PREDICATES = { 284 TokenType.ANY: exp.Any, 285 TokenType.ALL: exp.All, 286 TokenType.EXISTS: exp.Exists, 287 TokenType.SOME: exp.Any, 288 } 289 290 RESERVED_TOKENS = { 291 *Tokenizer.SINGLE_TOKENS.values(), 292 TokenType.SELECT, 293 } 294 295 DB_CREATABLES = { 296 TokenType.DATABASE, 297 TokenType.SCHEMA, 298 TokenType.TABLE, 299 TokenType.VIEW, 300 TokenType.MODEL, 301 TokenType.DICTIONARY, 302 TokenType.SEQUENCE, 303 TokenType.STORAGE_INTEGRATION, 304 } 305 306 CREATABLES = { 307 TokenType.COLUMN, 308 TokenType.CONSTRAINT, 309 TokenType.FUNCTION, 310 TokenType.INDEX, 311 TokenType.PROCEDURE, 312 TokenType.FOREIGN_KEY, 313 *DB_CREATABLES, 314 } 315 316 # Tokens that can represent identifiers 317 ID_VAR_TOKENS = { 318 TokenType.VAR, 319 TokenType.ANTI, 320 TokenType.APPLY, 321 TokenType.ASC, 322 TokenType.ASOF, 323 TokenType.AUTO_INCREMENT, 324 TokenType.BEGIN, 325 TokenType.BPCHAR, 326 TokenType.CACHE, 327 TokenType.CASE, 328 TokenType.COLLATE, 329 TokenType.COMMAND, 330 TokenType.COMMENT, 331 TokenType.COMMIT, 332 TokenType.CONSTRAINT, 333 TokenType.DEFAULT, 334 TokenType.DELETE, 335 TokenType.DESC, 336 TokenType.DESCRIBE, 337 TokenType.DICTIONARY, 338 TokenType.DIV, 339 TokenType.END, 340 TokenType.EXECUTE, 341 TokenType.ESCAPE, 342 TokenType.FALSE, 343 TokenType.FIRST, 344 TokenType.FILTER, 345 TokenType.FINAL, 346 TokenType.FORMAT, 347 TokenType.FULL, 348 TokenType.IS, 349 TokenType.ISNULL, 350 TokenType.INTERVAL, 351 TokenType.KEEP, 352 TokenType.KILL, 353 TokenType.LEFT, 354 TokenType.LOAD, 355 TokenType.MERGE, 356 TokenType.NATURAL, 357 TokenType.NEXT, 358 TokenType.OFFSET, 359 TokenType.OPERATOR, 360 TokenType.ORDINALITY, 361 TokenType.OVERLAPS, 362 TokenType.OVERWRITE, 363 TokenType.PARTITION, 364 TokenType.PERCENT, 365 TokenType.PIVOT, 366 TokenType.PRAGMA, 367 TokenType.RANGE, 368 TokenType.RECURSIVE, 369 TokenType.REFERENCES, 370 TokenType.REFRESH, 371 TokenType.REPLACE, 372 TokenType.RIGHT, 373 TokenType.ROW, 374 TokenType.ROWS, 375 TokenType.SEMI, 376 TokenType.SET, 377 TokenType.SETTINGS, 378 TokenType.SHOW, 379 TokenType.TEMPORARY, 380 TokenType.TOP, 381 TokenType.TRUE, 382 TokenType.TRUNCATE, 383 TokenType.UNIQUE, 384 TokenType.UNPIVOT, 385 TokenType.UPDATE, 386 TokenType.USE, 387 TokenType.VOLATILE, 388 TokenType.WINDOW, 389 *CREATABLES, 390 *SUBQUERY_PREDICATES, 391 *TYPE_TOKENS, 392 *NO_PAREN_FUNCTIONS, 393 } 394 395 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 396 397 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 398 TokenType.ANTI, 399 TokenType.APPLY, 400 TokenType.ASOF, 401 TokenType.FULL, 402 TokenType.LEFT, 403 TokenType.LOCK, 404 TokenType.NATURAL, 405 TokenType.OFFSET, 406 TokenType.RIGHT, 407 TokenType.SEMI, 408 TokenType.WINDOW, 409 } 410 411 ALIAS_TOKENS = ID_VAR_TOKENS 412 413 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 414 415 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 416 417 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 418 419 FUNC_TOKENS = { 420 TokenType.COLLATE, 421 TokenType.COMMAND, 422 TokenType.CURRENT_DATE, 423 TokenType.CURRENT_DATETIME, 424 TokenType.CURRENT_TIMESTAMP, 425 TokenType.CURRENT_TIME, 426 TokenType.CURRENT_USER, 427 TokenType.FILTER, 428 TokenType.FIRST, 429 TokenType.FORMAT, 430 TokenType.GLOB, 431 TokenType.IDENTIFIER, 432 TokenType.INDEX, 433 TokenType.ISNULL, 434 TokenType.ILIKE, 435 TokenType.INSERT, 436 TokenType.LIKE, 437 TokenType.MERGE, 438 TokenType.OFFSET, 439 TokenType.PRIMARY_KEY, 440 TokenType.RANGE, 441 TokenType.REPLACE, 442 TokenType.RLIKE, 443 TokenType.ROW, 444 TokenType.UNNEST, 445 TokenType.VAR, 446 TokenType.LEFT, 447 TokenType.RIGHT, 448 TokenType.SEQUENCE, 449 TokenType.DATE, 450 TokenType.DATETIME, 451 TokenType.TABLE, 452 TokenType.TIMESTAMP, 453 TokenType.TIMESTAMPTZ, 454 TokenType.TRUNCATE, 455 TokenType.WINDOW, 456 TokenType.XOR, 457 *TYPE_TOKENS, 458 *SUBQUERY_PREDICATES, 459 } 460 461 CONJUNCTION = { 462 TokenType.AND: exp.And, 463 TokenType.OR: exp.Or, 464 } 465 466 EQUALITY = { 467 TokenType.COLON_EQ: exp.PropertyEQ, 468 TokenType.EQ: exp.EQ, 469 TokenType.NEQ: exp.NEQ, 470 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 471 } 472 473 COMPARISON = { 474 TokenType.GT: exp.GT, 475 TokenType.GTE: exp.GTE, 476 TokenType.LT: exp.LT, 477 TokenType.LTE: exp.LTE, 478 } 479 480 BITWISE = { 481 TokenType.AMP: exp.BitwiseAnd, 482 TokenType.CARET: exp.BitwiseXor, 483 TokenType.PIPE: exp.BitwiseOr, 484 } 485 486 TERM = { 487 TokenType.DASH: exp.Sub, 488 TokenType.PLUS: exp.Add, 489 TokenType.MOD: exp.Mod, 490 TokenType.COLLATE: exp.Collate, 491 } 492 493 FACTOR = { 494 TokenType.DIV: exp.IntDiv, 495 TokenType.LR_ARROW: exp.Distance, 496 TokenType.SLASH: exp.Div, 497 TokenType.STAR: exp.Mul, 498 } 499 500 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 501 502 TIMES = { 503 TokenType.TIME, 504 TokenType.TIMETZ, 505 } 506 507 TIMESTAMPS = { 508 TokenType.TIMESTAMP, 509 TokenType.TIMESTAMPTZ, 510 TokenType.TIMESTAMPLTZ, 511 *TIMES, 512 } 513 514 SET_OPERATIONS = { 515 TokenType.UNION, 516 TokenType.INTERSECT, 517 TokenType.EXCEPT, 518 } 519 520 JOIN_METHODS = { 521 TokenType.ASOF, 522 TokenType.NATURAL, 523 TokenType.POSITIONAL, 524 } 525 526 JOIN_SIDES = { 527 TokenType.LEFT, 528 TokenType.RIGHT, 529 TokenType.FULL, 530 } 531 532 JOIN_KINDS = { 533 TokenType.INNER, 534 TokenType.OUTER, 535 TokenType.CROSS, 536 TokenType.SEMI, 537 TokenType.ANTI, 538 } 539 540 JOIN_HINTS: t.Set[str] = set() 541 542 LAMBDAS = { 543 TokenType.ARROW: lambda self, expressions: self.expression( 544 exp.Lambda, 545 this=self._replace_lambda( 546 self._parse_conjunction(), 547 {node.name for node in expressions}, 548 ), 549 expressions=expressions, 550 ), 551 TokenType.FARROW: lambda self, expressions: self.expression( 552 exp.Kwarg, 553 this=exp.var(expressions[0].name), 554 expression=self._parse_conjunction(), 555 ), 556 } 557 558 COLUMN_OPERATORS = { 559 TokenType.DOT: None, 560 TokenType.DCOLON: lambda self, this, to: self.expression( 561 exp.Cast if self.STRICT_CAST else exp.TryCast, 562 this=this, 563 to=to, 564 ), 565 TokenType.ARROW: lambda self, this, path: self.expression( 566 exp.JSONExtract, 567 this=this, 568 expression=self.dialect.to_json_path(path), 569 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 570 ), 571 TokenType.DARROW: lambda self, this, path: self.expression( 572 exp.JSONExtractScalar, 573 this=this, 574 expression=self.dialect.to_json_path(path), 575 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 576 ), 577 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 578 exp.JSONBExtract, 579 this=this, 580 expression=path, 581 ), 582 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 583 exp.JSONBExtractScalar, 584 this=this, 585 expression=path, 586 ), 587 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 588 exp.JSONBContains, 589 this=this, 590 expression=key, 591 ), 592 } 593 594 EXPRESSION_PARSERS = { 595 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 596 exp.Column: lambda self: self._parse_column(), 597 exp.Condition: lambda self: self._parse_conjunction(), 598 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 599 exp.Expression: lambda self: self._parse_expression(), 600 exp.From: lambda self: self._parse_from(), 601 exp.Group: lambda self: self._parse_group(), 602 exp.Having: lambda self: self._parse_having(), 603 exp.Identifier: lambda self: self._parse_id_var(), 604 exp.Join: lambda self: self._parse_join(), 605 exp.Lambda: lambda self: self._parse_lambda(), 606 exp.Lateral: lambda self: self._parse_lateral(), 607 exp.Limit: lambda self: self._parse_limit(), 608 exp.Offset: lambda self: self._parse_offset(), 609 exp.Order: lambda self: self._parse_order(), 610 exp.Ordered: lambda self: self._parse_ordered(), 611 exp.Properties: lambda self: self._parse_properties(), 612 exp.Qualify: lambda self: self._parse_qualify(), 613 exp.Returning: lambda self: self._parse_returning(), 614 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 615 exp.Table: lambda self: self._parse_table_parts(), 616 exp.TableAlias: lambda self: self._parse_table_alias(), 617 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 618 exp.Where: lambda self: self._parse_where(), 619 exp.Window: lambda self: self._parse_named_window(), 620 exp.With: lambda self: self._parse_with(), 621 "JOIN_TYPE": lambda self: self._parse_join_parts(), 622 } 623 624 STATEMENT_PARSERS = { 625 TokenType.ALTER: lambda self: self._parse_alter(), 626 TokenType.BEGIN: lambda self: self._parse_transaction(), 627 TokenType.CACHE: lambda self: self._parse_cache(), 628 TokenType.COMMENT: lambda self: self._parse_comment(), 629 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 630 TokenType.CREATE: lambda self: self._parse_create(), 631 TokenType.DELETE: lambda self: self._parse_delete(), 632 TokenType.DESC: lambda self: self._parse_describe(), 633 TokenType.DESCRIBE: lambda self: self._parse_describe(), 634 TokenType.DROP: lambda self: self._parse_drop(), 635 TokenType.INSERT: lambda self: self._parse_insert(), 636 TokenType.KILL: lambda self: self._parse_kill(), 637 TokenType.LOAD: lambda self: self._parse_load(), 638 TokenType.MERGE: lambda self: self._parse_merge(), 639 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 640 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 641 TokenType.REFRESH: lambda self: self._parse_refresh(), 642 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 643 TokenType.SET: lambda self: self._parse_set(), 644 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 645 TokenType.UNCACHE: lambda self: self._parse_uncache(), 646 TokenType.UPDATE: lambda self: self._parse_update(), 647 TokenType.USE: lambda self: self.expression( 648 exp.Use, 649 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 650 this=self._parse_table(schema=False), 651 ), 652 } 653 654 UNARY_PARSERS = { 655 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 656 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 657 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 658 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 659 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 660 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 661 } 662 663 STRING_PARSERS = { 664 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 665 exp.RawString, this=token.text 666 ), 667 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 668 exp.National, this=token.text 669 ), 670 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 671 TokenType.STRING: lambda self, token: self.expression( 672 exp.Literal, this=token.text, is_string=True 673 ), 674 TokenType.UNICODE_STRING: lambda self, token: self.expression( 675 exp.UnicodeString, 676 this=token.text, 677 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 678 ), 679 } 680 681 NUMERIC_PARSERS = { 682 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 683 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 684 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 685 TokenType.NUMBER: lambda self, token: self.expression( 686 exp.Literal, this=token.text, is_string=False 687 ), 688 } 689 690 PRIMARY_PARSERS = { 691 **STRING_PARSERS, 692 **NUMERIC_PARSERS, 693 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 694 TokenType.NULL: lambda self, _: self.expression(exp.Null), 695 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 696 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 697 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 698 TokenType.STAR: lambda self, _: self.expression( 699 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 700 ), 701 } 702 703 PLACEHOLDER_PARSERS = { 704 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 705 TokenType.PARAMETER: lambda self: self._parse_parameter(), 706 TokenType.COLON: lambda self: ( 707 self.expression(exp.Placeholder, this=self._prev.text) 708 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 709 else None 710 ), 711 } 712 713 RANGE_PARSERS = { 714 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 715 TokenType.GLOB: binary_range_parser(exp.Glob), 716 TokenType.ILIKE: binary_range_parser(exp.ILike), 717 TokenType.IN: lambda self, this: self._parse_in(this), 718 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 719 TokenType.IS: lambda self, this: self._parse_is(this), 720 TokenType.LIKE: binary_range_parser(exp.Like), 721 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 722 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 723 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 724 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 725 } 726 727 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 728 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 729 "AUTO": lambda self: self._parse_auto_property(), 730 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 731 "BACKUP": lambda self: self.expression( 732 exp.BackupProperty, this=self._parse_var(any_token=True) 733 ), 734 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 735 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 736 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 737 "CHECKSUM": lambda self: self._parse_checksum(), 738 "CLUSTER BY": lambda self: self._parse_cluster(), 739 "CLUSTERED": lambda self: self._parse_clustered_by(), 740 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 741 exp.CollateProperty, **kwargs 742 ), 743 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 744 "CONTAINS": lambda self: self._parse_contains_property(), 745 "COPY": lambda self: self._parse_copy_property(), 746 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 747 "DEFINER": lambda self: self._parse_definer(), 748 "DETERMINISTIC": lambda self: self.expression( 749 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 750 ), 751 "DISTKEY": lambda self: self._parse_distkey(), 752 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 753 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 754 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 755 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 756 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 757 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 758 "FREESPACE": lambda self: self._parse_freespace(), 759 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 760 "HEAP": lambda self: self.expression(exp.HeapProperty), 761 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 762 "IMMUTABLE": lambda self: self.expression( 763 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 764 ), 765 "INHERITS": lambda self: self.expression( 766 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 767 ), 768 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 769 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 770 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 771 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 772 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 773 "LIKE": lambda self: self._parse_create_like(), 774 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 775 "LOCK": lambda self: self._parse_locking(), 776 "LOCKING": lambda self: self._parse_locking(), 777 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 778 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 779 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 780 "MODIFIES": lambda self: self._parse_modifies_property(), 781 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 782 "NO": lambda self: self._parse_no_property(), 783 "ON": lambda self: self._parse_on_property(), 784 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 785 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 786 "PARTITION": lambda self: self._parse_partitioned_of(), 787 "PARTITION BY": lambda self: self._parse_partitioned_by(), 788 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 789 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 790 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 791 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 792 "READS": lambda self: self._parse_reads_property(), 793 "REMOTE": lambda self: self._parse_remote_with_connection(), 794 "RETURNS": lambda self: self._parse_returns(), 795 "ROW": lambda self: self._parse_row(), 796 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 797 "SAMPLE": lambda self: self.expression( 798 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 799 ), 800 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 801 "SETTINGS": lambda self: self.expression( 802 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 803 ), 804 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 805 "SORTKEY": lambda self: self._parse_sortkey(), 806 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 807 "STABLE": lambda self: self.expression( 808 exp.StabilityProperty, this=exp.Literal.string("STABLE") 809 ), 810 "STORED": lambda self: self._parse_stored(), 811 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 812 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 813 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 814 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 815 "TO": lambda self: self._parse_to_table(), 816 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 817 "TRANSFORM": lambda self: self.expression( 818 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 819 ), 820 "TTL": lambda self: self._parse_ttl(), 821 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 822 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 823 "VOLATILE": lambda self: self._parse_volatile_property(), 824 "WITH": lambda self: self._parse_with_property(), 825 } 826 827 CONSTRAINT_PARSERS = { 828 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 829 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 830 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 831 "CHARACTER SET": lambda self: self.expression( 832 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 833 ), 834 "CHECK": lambda self: self.expression( 835 exp.CheckColumnConstraint, 836 this=self._parse_wrapped(self._parse_conjunction), 837 enforced=self._match_text_seq("ENFORCED"), 838 ), 839 "COLLATE": lambda self: self.expression( 840 exp.CollateColumnConstraint, this=self._parse_var() 841 ), 842 "COMMENT": lambda self: self.expression( 843 exp.CommentColumnConstraint, this=self._parse_string() 844 ), 845 "COMPRESS": lambda self: self._parse_compress(), 846 "CLUSTERED": lambda self: self.expression( 847 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 848 ), 849 "NONCLUSTERED": lambda self: self.expression( 850 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 851 ), 852 "DEFAULT": lambda self: self.expression( 853 exp.DefaultColumnConstraint, this=self._parse_bitwise() 854 ), 855 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 856 "EXCLUDE": lambda self: self.expression( 857 exp.ExcludeColumnConstraint, this=self._parse_index_params() 858 ), 859 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 860 "FORMAT": lambda self: self.expression( 861 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 862 ), 863 "GENERATED": lambda self: self._parse_generated_as_identity(), 864 "IDENTITY": lambda self: self._parse_auto_increment(), 865 "INLINE": lambda self: self._parse_inline(), 866 "LIKE": lambda self: self._parse_create_like(), 867 "NOT": lambda self: self._parse_not_constraint(), 868 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 869 "ON": lambda self: ( 870 self._match(TokenType.UPDATE) 871 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 872 ) 873 or self.expression(exp.OnProperty, this=self._parse_id_var()), 874 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 875 "PERIOD": lambda self: self._parse_period_for_system_time(), 876 "PRIMARY KEY": lambda self: self._parse_primary_key(), 877 "REFERENCES": lambda self: self._parse_references(match=False), 878 "TITLE": lambda self: self.expression( 879 exp.TitleColumnConstraint, this=self._parse_var_or_string() 880 ), 881 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 882 "UNIQUE": lambda self: self._parse_unique(), 883 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 884 "WITH": lambda self: self.expression( 885 exp.Properties, expressions=self._parse_wrapped_properties() 886 ), 887 } 888 889 ALTER_PARSERS = { 890 "ADD": lambda self: self._parse_alter_table_add(), 891 "ALTER": lambda self: self._parse_alter_table_alter(), 892 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 893 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 894 "DROP": lambda self: self._parse_alter_table_drop(), 895 "RENAME": lambda self: self._parse_alter_table_rename(), 896 } 897 898 SCHEMA_UNNAMED_CONSTRAINTS = { 899 "CHECK", 900 "EXCLUDE", 901 "FOREIGN KEY", 902 "LIKE", 903 "PERIOD", 904 "PRIMARY KEY", 905 "UNIQUE", 906 } 907 908 NO_PAREN_FUNCTION_PARSERS = { 909 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 910 "CASE": lambda self: self._parse_case(), 911 "IF": lambda self: self._parse_if(), 912 "NEXT": lambda self: self._parse_next_value_for(), 913 } 914 915 INVALID_FUNC_NAME_TOKENS = { 916 TokenType.IDENTIFIER, 917 TokenType.STRING, 918 } 919 920 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 921 922 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 923 924 FUNCTION_PARSERS = { 925 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 926 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 927 "DECODE": lambda self: self._parse_decode(), 928 "EXTRACT": lambda self: self._parse_extract(), 929 "JSON_OBJECT": lambda self: self._parse_json_object(), 930 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 931 "JSON_TABLE": lambda self: self._parse_json_table(), 932 "MATCH": lambda self: self._parse_match_against(), 933 "OPENJSON": lambda self: self._parse_open_json(), 934 "POSITION": lambda self: self._parse_position(), 935 "PREDICT": lambda self: self._parse_predict(), 936 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 937 "STRING_AGG": lambda self: self._parse_string_agg(), 938 "SUBSTRING": lambda self: self._parse_substring(), 939 "TRIM": lambda self: self._parse_trim(), 940 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 941 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 942 } 943 944 QUERY_MODIFIER_PARSERS = { 945 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 946 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 947 TokenType.WHERE: lambda self: ("where", self._parse_where()), 948 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 949 TokenType.HAVING: lambda self: ("having", self._parse_having()), 950 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 951 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 952 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 953 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 954 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 955 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 956 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 957 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 958 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 959 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 960 TokenType.CLUSTER_BY: lambda self: ( 961 "cluster", 962 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 963 ), 964 TokenType.DISTRIBUTE_BY: lambda self: ( 965 "distribute", 966 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 967 ), 968 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 969 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 970 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 971 } 972 973 SET_PARSERS = { 974 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 975 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 976 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 977 "TRANSACTION": lambda self: self._parse_set_transaction(), 978 } 979 980 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 981 982 TYPE_LITERAL_PARSERS = { 983 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 984 } 985 986 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 987 988 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 989 990 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 991 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 992 "ISOLATION": ( 993 ("LEVEL", "REPEATABLE", "READ"), 994 ("LEVEL", "READ", "COMMITTED"), 995 ("LEVEL", "READ", "UNCOMITTED"), 996 ("LEVEL", "SERIALIZABLE"), 997 ), 998 "READ": ("WRITE", "ONLY"), 999 } 1000 1001 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1002 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1003 ) 1004 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1005 1006 CREATE_SEQUENCE: OPTIONS_TYPE = { 1007 "SCALE": ("EXTEND", "NOEXTEND"), 1008 "SHARD": ("EXTEND", "NOEXTEND"), 1009 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1010 **dict.fromkeys( 1011 ( 1012 "SESSION", 1013 "GLOBAL", 1014 "KEEP", 1015 "NOKEEP", 1016 "ORDER", 1017 "NOORDER", 1018 "NOCACHE", 1019 "CYCLE", 1020 "NOCYCLE", 1021 "NOMINVALUE", 1022 "NOMAXVALUE", 1023 "NOSCALE", 1024 "NOSHARD", 1025 ), 1026 tuple(), 1027 ), 1028 } 1029 1030 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1031 1032 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1033 1034 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1035 1036 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1037 1038 CLONE_KEYWORDS = {"CLONE", "COPY"} 1039 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1040 1041 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1042 1043 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1044 1045 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1046 1047 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1048 1049 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1050 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1051 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1052 1053 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1054 1055 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1056 1057 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1058 1059 DISTINCT_TOKENS = {TokenType.DISTINCT} 1060 1061 NULL_TOKENS = {TokenType.NULL} 1062 1063 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1064 1065 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1066 1067 STRICT_CAST = True 1068 1069 PREFIXED_PIVOT_COLUMNS = False 1070 IDENTIFY_PIVOT_STRINGS = False 1071 1072 LOG_DEFAULTS_TO_LN = False 1073 1074 # Whether ADD is present for each column added by ALTER TABLE 1075 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1076 1077 # Whether the table sample clause expects CSV syntax 1078 TABLESAMPLE_CSV = False 1079 1080 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1081 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1082 1083 # Whether the TRIM function expects the characters to trim as its first argument 1084 TRIM_PATTERN_FIRST = False 1085 1086 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1087 STRING_ALIASES = False 1088 1089 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1090 MODIFIERS_ATTACHED_TO_UNION = True 1091 UNION_MODIFIERS = {"order", "limit", "offset"} 1092 1093 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1094 NO_PAREN_IF_COMMANDS = True 1095 1096 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1097 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1098 1099 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1100 # If this is True and '(' is not found, the keyword will be treated as an identifier 1101 VALUES_FOLLOWED_BY_PAREN = True 1102 1103 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1104 SUPPORTS_IMPLICIT_UNNEST = False 1105 1106 __slots__ = ( 1107 "error_level", 1108 "error_message_context", 1109 "max_errors", 1110 "dialect", 1111 "sql", 1112 "errors", 1113 "_tokens", 1114 "_index", 1115 "_curr", 1116 "_next", 1117 "_prev", 1118 "_prev_comments", 1119 ) 1120 1121 # Autofilled 1122 SHOW_TRIE: t.Dict = {} 1123 SET_TRIE: t.Dict = {} 1124 1125 def __init__( 1126 self, 1127 error_level: t.Optional[ErrorLevel] = None, 1128 error_message_context: int = 100, 1129 max_errors: int = 3, 1130 dialect: DialectType = None, 1131 ): 1132 from sqlglot.dialects import Dialect 1133 1134 self.error_level = error_level or ErrorLevel.IMMEDIATE 1135 self.error_message_context = error_message_context 1136 self.max_errors = max_errors 1137 self.dialect = Dialect.get_or_raise(dialect) 1138 self.reset() 1139 1140 def reset(self): 1141 self.sql = "" 1142 self.errors = [] 1143 self._tokens = [] 1144 self._index = 0 1145 self._curr = None 1146 self._next = None 1147 self._prev = None 1148 self._prev_comments = None 1149 1150 def parse( 1151 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1152 ) -> t.List[t.Optional[exp.Expression]]: 1153 """ 1154 Parses a list of tokens and returns a list of syntax trees, one tree 1155 per parsed SQL statement. 1156 1157 Args: 1158 raw_tokens: The list of tokens. 1159 sql: The original SQL string, used to produce helpful debug messages. 1160 1161 Returns: 1162 The list of the produced syntax trees. 1163 """ 1164 return self._parse( 1165 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1166 ) 1167 1168 def parse_into( 1169 self, 1170 expression_types: exp.IntoType, 1171 raw_tokens: t.List[Token], 1172 sql: t.Optional[str] = None, 1173 ) -> t.List[t.Optional[exp.Expression]]: 1174 """ 1175 Parses a list of tokens into a given Expression type. If a collection of Expression 1176 types is given instead, this method will try to parse the token list into each one 1177 of them, stopping at the first for which the parsing succeeds. 1178 1179 Args: 1180 expression_types: The expression type(s) to try and parse the token list into. 1181 raw_tokens: The list of tokens. 1182 sql: The original SQL string, used to produce helpful debug messages. 1183 1184 Returns: 1185 The target Expression. 1186 """ 1187 errors = [] 1188 for expression_type in ensure_list(expression_types): 1189 parser = self.EXPRESSION_PARSERS.get(expression_type) 1190 if not parser: 1191 raise TypeError(f"No parser registered for {expression_type}") 1192 1193 try: 1194 return self._parse(parser, raw_tokens, sql) 1195 except ParseError as e: 1196 e.errors[0]["into_expression"] = expression_type 1197 errors.append(e) 1198 1199 raise ParseError( 1200 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1201 errors=merge_errors(errors), 1202 ) from errors[-1] 1203 1204 def _parse( 1205 self, 1206 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1207 raw_tokens: t.List[Token], 1208 sql: t.Optional[str] = None, 1209 ) -> t.List[t.Optional[exp.Expression]]: 1210 self.reset() 1211 self.sql = sql or "" 1212 1213 total = len(raw_tokens) 1214 chunks: t.List[t.List[Token]] = [[]] 1215 1216 for i, token in enumerate(raw_tokens): 1217 if token.token_type == TokenType.SEMICOLON: 1218 if i < total - 1: 1219 chunks.append([]) 1220 else: 1221 chunks[-1].append(token) 1222 1223 expressions = [] 1224 1225 for tokens in chunks: 1226 self._index = -1 1227 self._tokens = tokens 1228 self._advance() 1229 1230 expressions.append(parse_method(self)) 1231 1232 if self._index < len(self._tokens): 1233 self.raise_error("Invalid expression / Unexpected token") 1234 1235 self.check_errors() 1236 1237 return expressions 1238 1239 def check_errors(self) -> None: 1240 """Logs or raises any found errors, depending on the chosen error level setting.""" 1241 if self.error_level == ErrorLevel.WARN: 1242 for error in self.errors: 1243 logger.error(str(error)) 1244 elif self.error_level == ErrorLevel.RAISE and self.errors: 1245 raise ParseError( 1246 concat_messages(self.errors, self.max_errors), 1247 errors=merge_errors(self.errors), 1248 ) 1249 1250 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1251 """ 1252 Appends an error in the list of recorded errors or raises it, depending on the chosen 1253 error level setting. 1254 """ 1255 token = token or self._curr or self._prev or Token.string("") 1256 start = token.start 1257 end = token.end + 1 1258 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1259 highlight = self.sql[start:end] 1260 end_context = self.sql[end : end + self.error_message_context] 1261 1262 error = ParseError.new( 1263 f"{message}. Line {token.line}, Col: {token.col}.\n" 1264 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1265 description=message, 1266 line=token.line, 1267 col=token.col, 1268 start_context=start_context, 1269 highlight=highlight, 1270 end_context=end_context, 1271 ) 1272 1273 if self.error_level == ErrorLevel.IMMEDIATE: 1274 raise error 1275 1276 self.errors.append(error) 1277 1278 def expression( 1279 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1280 ) -> E: 1281 """ 1282 Creates a new, validated Expression. 1283 1284 Args: 1285 exp_class: The expression class to instantiate. 1286 comments: An optional list of comments to attach to the expression. 1287 kwargs: The arguments to set for the expression along with their respective values. 1288 1289 Returns: 1290 The target expression. 1291 """ 1292 instance = exp_class(**kwargs) 1293 instance.add_comments(comments) if comments else self._add_comments(instance) 1294 return self.validate_expression(instance) 1295 1296 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1297 if expression and self._prev_comments: 1298 expression.add_comments(self._prev_comments) 1299 self._prev_comments = None 1300 1301 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1302 """ 1303 Validates an Expression, making sure that all its mandatory arguments are set. 1304 1305 Args: 1306 expression: The expression to validate. 1307 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1308 1309 Returns: 1310 The validated expression. 1311 """ 1312 if self.error_level != ErrorLevel.IGNORE: 1313 for error_message in expression.error_messages(args): 1314 self.raise_error(error_message) 1315 1316 return expression 1317 1318 def _find_sql(self, start: Token, end: Token) -> str: 1319 return self.sql[start.start : end.end + 1] 1320 1321 def _is_connected(self) -> bool: 1322 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1323 1324 def _advance(self, times: int = 1) -> None: 1325 self._index += times 1326 self._curr = seq_get(self._tokens, self._index) 1327 self._next = seq_get(self._tokens, self._index + 1) 1328 1329 if self._index > 0: 1330 self._prev = self._tokens[self._index - 1] 1331 self._prev_comments = self._prev.comments 1332 else: 1333 self._prev = None 1334 self._prev_comments = None 1335 1336 def _retreat(self, index: int) -> None: 1337 if index != self._index: 1338 self._advance(index - self._index) 1339 1340 def _warn_unsupported(self) -> None: 1341 if len(self._tokens) <= 1: 1342 return 1343 1344 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1345 # interested in emitting a warning for the one being currently processed. 1346 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1347 1348 logger.warning( 1349 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1350 ) 1351 1352 def _parse_command(self) -> exp.Command: 1353 self._warn_unsupported() 1354 return self.expression( 1355 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1356 ) 1357 1358 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1359 """ 1360 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1361 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1362 the parser state accordingly 1363 """ 1364 index = self._index 1365 error_level = self.error_level 1366 1367 self.error_level = ErrorLevel.IMMEDIATE 1368 try: 1369 this = parse_method() 1370 except ParseError: 1371 this = None 1372 finally: 1373 if not this or retreat: 1374 self._retreat(index) 1375 self.error_level = error_level 1376 1377 return this 1378 1379 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1380 start = self._prev 1381 exists = self._parse_exists() if allow_exists else None 1382 1383 self._match(TokenType.ON) 1384 1385 kind = self._match_set(self.CREATABLES) and self._prev 1386 if not kind: 1387 return self._parse_as_command(start) 1388 1389 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1390 this = self._parse_user_defined_function(kind=kind.token_type) 1391 elif kind.token_type == TokenType.TABLE: 1392 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1393 elif kind.token_type == TokenType.COLUMN: 1394 this = self._parse_column() 1395 else: 1396 this = self._parse_id_var() 1397 1398 self._match(TokenType.IS) 1399 1400 return self.expression( 1401 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1402 ) 1403 1404 def _parse_to_table( 1405 self, 1406 ) -> exp.ToTableProperty: 1407 table = self._parse_table_parts(schema=True) 1408 return self.expression(exp.ToTableProperty, this=table) 1409 1410 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1411 def _parse_ttl(self) -> exp.Expression: 1412 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1413 this = self._parse_bitwise() 1414 1415 if self._match_text_seq("DELETE"): 1416 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1417 if self._match_text_seq("RECOMPRESS"): 1418 return self.expression( 1419 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1420 ) 1421 if self._match_text_seq("TO", "DISK"): 1422 return self.expression( 1423 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1424 ) 1425 if self._match_text_seq("TO", "VOLUME"): 1426 return self.expression( 1427 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1428 ) 1429 1430 return this 1431 1432 expressions = self._parse_csv(_parse_ttl_action) 1433 where = self._parse_where() 1434 group = self._parse_group() 1435 1436 aggregates = None 1437 if group and self._match(TokenType.SET): 1438 aggregates = self._parse_csv(self._parse_set_item) 1439 1440 return self.expression( 1441 exp.MergeTreeTTL, 1442 expressions=expressions, 1443 where=where, 1444 group=group, 1445 aggregates=aggregates, 1446 ) 1447 1448 def _parse_statement(self) -> t.Optional[exp.Expression]: 1449 if self._curr is None: 1450 return None 1451 1452 if self._match_set(self.STATEMENT_PARSERS): 1453 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1454 1455 if self._match_set(Tokenizer.COMMANDS): 1456 return self._parse_command() 1457 1458 expression = self._parse_expression() 1459 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1460 return self._parse_query_modifiers(expression) 1461 1462 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1463 start = self._prev 1464 temporary = self._match(TokenType.TEMPORARY) 1465 materialized = self._match_text_seq("MATERIALIZED") 1466 1467 kind = self._match_set(self.CREATABLES) and self._prev.text 1468 if not kind: 1469 return self._parse_as_command(start) 1470 1471 if_exists = exists or self._parse_exists() 1472 table = self._parse_table_parts( 1473 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1474 ) 1475 1476 if self._match(TokenType.L_PAREN, advance=False): 1477 expressions = self._parse_wrapped_csv(self._parse_types) 1478 else: 1479 expressions = None 1480 1481 return self.expression( 1482 exp.Drop, 1483 comments=start.comments, 1484 exists=if_exists, 1485 this=table, 1486 expressions=expressions, 1487 kind=kind, 1488 temporary=temporary, 1489 materialized=materialized, 1490 cascade=self._match_text_seq("CASCADE"), 1491 constraints=self._match_text_seq("CONSTRAINTS"), 1492 purge=self._match_text_seq("PURGE"), 1493 ) 1494 1495 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1496 return ( 1497 self._match_text_seq("IF") 1498 and (not not_ or self._match(TokenType.NOT)) 1499 and self._match(TokenType.EXISTS) 1500 ) 1501 1502 def _parse_create(self) -> exp.Create | exp.Command: 1503 # Note: this can't be None because we've matched a statement parser 1504 start = self._prev 1505 comments = self._prev_comments 1506 1507 replace = ( 1508 start.token_type == TokenType.REPLACE 1509 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1510 or self._match_pair(TokenType.OR, TokenType.ALTER) 1511 ) 1512 1513 unique = self._match(TokenType.UNIQUE) 1514 1515 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1516 self._advance() 1517 1518 properties = None 1519 create_token = self._match_set(self.CREATABLES) and self._prev 1520 1521 if not create_token: 1522 # exp.Properties.Location.POST_CREATE 1523 properties = self._parse_properties() 1524 create_token = self._match_set(self.CREATABLES) and self._prev 1525 1526 if not properties or not create_token: 1527 return self._parse_as_command(start) 1528 1529 exists = self._parse_exists(not_=True) 1530 this = None 1531 expression: t.Optional[exp.Expression] = None 1532 indexes = None 1533 no_schema_binding = None 1534 begin = None 1535 end = None 1536 clone = None 1537 1538 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1539 nonlocal properties 1540 if properties and temp_props: 1541 properties.expressions.extend(temp_props.expressions) 1542 elif temp_props: 1543 properties = temp_props 1544 1545 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1546 this = self._parse_user_defined_function(kind=create_token.token_type) 1547 1548 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1549 extend_props(self._parse_properties()) 1550 1551 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1552 1553 if not expression: 1554 if self._match(TokenType.COMMAND): 1555 expression = self._parse_as_command(self._prev) 1556 else: 1557 begin = self._match(TokenType.BEGIN) 1558 return_ = self._match_text_seq("RETURN") 1559 1560 if self._match(TokenType.STRING, advance=False): 1561 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1562 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1563 expression = self._parse_string() 1564 extend_props(self._parse_properties()) 1565 else: 1566 expression = self._parse_statement() 1567 1568 end = self._match_text_seq("END") 1569 1570 if return_: 1571 expression = self.expression(exp.Return, this=expression) 1572 elif create_token.token_type == TokenType.INDEX: 1573 this = self._parse_index(index=self._parse_id_var()) 1574 elif create_token.token_type in self.DB_CREATABLES: 1575 table_parts = self._parse_table_parts( 1576 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1577 ) 1578 1579 # exp.Properties.Location.POST_NAME 1580 self._match(TokenType.COMMA) 1581 extend_props(self._parse_properties(before=True)) 1582 1583 this = self._parse_schema(this=table_parts) 1584 1585 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1586 extend_props(self._parse_properties()) 1587 1588 self._match(TokenType.ALIAS) 1589 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1590 # exp.Properties.Location.POST_ALIAS 1591 extend_props(self._parse_properties()) 1592 1593 if create_token.token_type == TokenType.SEQUENCE: 1594 expression = self._parse_types() 1595 extend_props(self._parse_properties()) 1596 else: 1597 expression = self._parse_ddl_select() 1598 1599 if create_token.token_type == TokenType.TABLE: 1600 # exp.Properties.Location.POST_EXPRESSION 1601 extend_props(self._parse_properties()) 1602 1603 indexes = [] 1604 while True: 1605 index = self._parse_index() 1606 1607 # exp.Properties.Location.POST_INDEX 1608 extend_props(self._parse_properties()) 1609 1610 if not index: 1611 break 1612 else: 1613 self._match(TokenType.COMMA) 1614 indexes.append(index) 1615 elif create_token.token_type == TokenType.VIEW: 1616 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1617 no_schema_binding = True 1618 1619 shallow = self._match_text_seq("SHALLOW") 1620 1621 if self._match_texts(self.CLONE_KEYWORDS): 1622 copy = self._prev.text.lower() == "copy" 1623 clone = self.expression( 1624 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1625 ) 1626 1627 if self._curr: 1628 return self._parse_as_command(start) 1629 1630 return self.expression( 1631 exp.Create, 1632 comments=comments, 1633 this=this, 1634 kind=create_token.text.upper(), 1635 replace=replace, 1636 unique=unique, 1637 expression=expression, 1638 exists=exists, 1639 properties=properties, 1640 indexes=indexes, 1641 no_schema_binding=no_schema_binding, 1642 begin=begin, 1643 end=end, 1644 clone=clone, 1645 ) 1646 1647 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1648 seq = exp.SequenceProperties() 1649 1650 options = [] 1651 index = self._index 1652 1653 while self._curr: 1654 if self._match_text_seq("INCREMENT"): 1655 self._match_text_seq("BY") 1656 self._match_text_seq("=") 1657 seq.set("increment", self._parse_term()) 1658 elif self._match_text_seq("MINVALUE"): 1659 seq.set("minvalue", self._parse_term()) 1660 elif self._match_text_seq("MAXVALUE"): 1661 seq.set("maxvalue", self._parse_term()) 1662 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1663 self._match_text_seq("=") 1664 seq.set("start", self._parse_term()) 1665 elif self._match_text_seq("CACHE"): 1666 # T-SQL allows empty CACHE which is initialized dynamically 1667 seq.set("cache", self._parse_number() or True) 1668 elif self._match_text_seq("OWNED", "BY"): 1669 # "OWNED BY NONE" is the default 1670 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1671 else: 1672 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1673 if opt: 1674 options.append(opt) 1675 else: 1676 break 1677 1678 seq.set("options", options if options else None) 1679 return None if self._index == index else seq 1680 1681 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1682 # only used for teradata currently 1683 self._match(TokenType.COMMA) 1684 1685 kwargs = { 1686 "no": self._match_text_seq("NO"), 1687 "dual": self._match_text_seq("DUAL"), 1688 "before": self._match_text_seq("BEFORE"), 1689 "default": self._match_text_seq("DEFAULT"), 1690 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1691 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1692 "after": self._match_text_seq("AFTER"), 1693 "minimum": self._match_texts(("MIN", "MINIMUM")), 1694 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1695 } 1696 1697 if self._match_texts(self.PROPERTY_PARSERS): 1698 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1699 try: 1700 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1701 except TypeError: 1702 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1703 1704 return None 1705 1706 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1707 return self._parse_wrapped_csv(self._parse_property) 1708 1709 def _parse_property(self) -> t.Optional[exp.Expression]: 1710 if self._match_texts(self.PROPERTY_PARSERS): 1711 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1712 1713 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1714 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1715 1716 if self._match_text_seq("COMPOUND", "SORTKEY"): 1717 return self._parse_sortkey(compound=True) 1718 1719 if self._match_text_seq("SQL", "SECURITY"): 1720 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1721 1722 index = self._index 1723 key = self._parse_column() 1724 1725 if not self._match(TokenType.EQ): 1726 self._retreat(index) 1727 return self._parse_sequence_properties() 1728 1729 return self.expression( 1730 exp.Property, 1731 this=key.to_dot() if isinstance(key, exp.Column) else key, 1732 value=self._parse_column() or self._parse_var(any_token=True), 1733 ) 1734 1735 def _parse_stored(self) -> exp.FileFormatProperty: 1736 self._match(TokenType.ALIAS) 1737 1738 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1739 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1740 1741 return self.expression( 1742 exp.FileFormatProperty, 1743 this=( 1744 self.expression( 1745 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1746 ) 1747 if input_format or output_format 1748 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1749 ), 1750 ) 1751 1752 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1753 self._match(TokenType.EQ) 1754 self._match(TokenType.ALIAS) 1755 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1756 1757 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1758 properties = [] 1759 while True: 1760 if before: 1761 prop = self._parse_property_before() 1762 else: 1763 prop = self._parse_property() 1764 if not prop: 1765 break 1766 for p in ensure_list(prop): 1767 properties.append(p) 1768 1769 if properties: 1770 return self.expression(exp.Properties, expressions=properties) 1771 1772 return None 1773 1774 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1775 return self.expression( 1776 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1777 ) 1778 1779 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1780 if self._index >= 2: 1781 pre_volatile_token = self._tokens[self._index - 2] 1782 else: 1783 pre_volatile_token = None 1784 1785 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1786 return exp.VolatileProperty() 1787 1788 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1789 1790 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1791 self._match_pair(TokenType.EQ, TokenType.ON) 1792 1793 prop = self.expression(exp.WithSystemVersioningProperty) 1794 if self._match(TokenType.L_PAREN): 1795 self._match_text_seq("HISTORY_TABLE", "=") 1796 prop.set("this", self._parse_table_parts()) 1797 1798 if self._match(TokenType.COMMA): 1799 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1800 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1801 1802 self._match_r_paren() 1803 1804 return prop 1805 1806 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1807 if self._match(TokenType.L_PAREN, advance=False): 1808 return self._parse_wrapped_properties() 1809 1810 if self._match_text_seq("JOURNAL"): 1811 return self._parse_withjournaltable() 1812 1813 if self._match_texts(self.VIEW_ATTRIBUTES): 1814 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1815 1816 if self._match_text_seq("DATA"): 1817 return self._parse_withdata(no=False) 1818 elif self._match_text_seq("NO", "DATA"): 1819 return self._parse_withdata(no=True) 1820 1821 if not self._next: 1822 return None 1823 1824 return self._parse_withisolatedloading() 1825 1826 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1827 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1828 self._match(TokenType.EQ) 1829 1830 user = self._parse_id_var() 1831 self._match(TokenType.PARAMETER) 1832 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1833 1834 if not user or not host: 1835 return None 1836 1837 return exp.DefinerProperty(this=f"{user}@{host}") 1838 1839 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1840 self._match(TokenType.TABLE) 1841 self._match(TokenType.EQ) 1842 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1843 1844 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1845 return self.expression(exp.LogProperty, no=no) 1846 1847 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1848 return self.expression(exp.JournalProperty, **kwargs) 1849 1850 def _parse_checksum(self) -> exp.ChecksumProperty: 1851 self._match(TokenType.EQ) 1852 1853 on = None 1854 if self._match(TokenType.ON): 1855 on = True 1856 elif self._match_text_seq("OFF"): 1857 on = False 1858 1859 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1860 1861 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1862 return self.expression( 1863 exp.Cluster, 1864 expressions=( 1865 self._parse_wrapped_csv(self._parse_ordered) 1866 if wrapped 1867 else self._parse_csv(self._parse_ordered) 1868 ), 1869 ) 1870 1871 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1872 self._match_text_seq("BY") 1873 1874 self._match_l_paren() 1875 expressions = self._parse_csv(self._parse_column) 1876 self._match_r_paren() 1877 1878 if self._match_text_seq("SORTED", "BY"): 1879 self._match_l_paren() 1880 sorted_by = self._parse_csv(self._parse_ordered) 1881 self._match_r_paren() 1882 else: 1883 sorted_by = None 1884 1885 self._match(TokenType.INTO) 1886 buckets = self._parse_number() 1887 self._match_text_seq("BUCKETS") 1888 1889 return self.expression( 1890 exp.ClusteredByProperty, 1891 expressions=expressions, 1892 sorted_by=sorted_by, 1893 buckets=buckets, 1894 ) 1895 1896 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1897 if not self._match_text_seq("GRANTS"): 1898 self._retreat(self._index - 1) 1899 return None 1900 1901 return self.expression(exp.CopyGrantsProperty) 1902 1903 def _parse_freespace(self) -> exp.FreespaceProperty: 1904 self._match(TokenType.EQ) 1905 return self.expression( 1906 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1907 ) 1908 1909 def _parse_mergeblockratio( 1910 self, no: bool = False, default: bool = False 1911 ) -> exp.MergeBlockRatioProperty: 1912 if self._match(TokenType.EQ): 1913 return self.expression( 1914 exp.MergeBlockRatioProperty, 1915 this=self._parse_number(), 1916 percent=self._match(TokenType.PERCENT), 1917 ) 1918 1919 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1920 1921 def _parse_datablocksize( 1922 self, 1923 default: t.Optional[bool] = None, 1924 minimum: t.Optional[bool] = None, 1925 maximum: t.Optional[bool] = None, 1926 ) -> exp.DataBlocksizeProperty: 1927 self._match(TokenType.EQ) 1928 size = self._parse_number() 1929 1930 units = None 1931 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1932 units = self._prev.text 1933 1934 return self.expression( 1935 exp.DataBlocksizeProperty, 1936 size=size, 1937 units=units, 1938 default=default, 1939 minimum=minimum, 1940 maximum=maximum, 1941 ) 1942 1943 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1944 self._match(TokenType.EQ) 1945 always = self._match_text_seq("ALWAYS") 1946 manual = self._match_text_seq("MANUAL") 1947 never = self._match_text_seq("NEVER") 1948 default = self._match_text_seq("DEFAULT") 1949 1950 autotemp = None 1951 if self._match_text_seq("AUTOTEMP"): 1952 autotemp = self._parse_schema() 1953 1954 return self.expression( 1955 exp.BlockCompressionProperty, 1956 always=always, 1957 manual=manual, 1958 never=never, 1959 default=default, 1960 autotemp=autotemp, 1961 ) 1962 1963 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 1964 index = self._index 1965 no = self._match_text_seq("NO") 1966 concurrent = self._match_text_seq("CONCURRENT") 1967 1968 if not self._match_text_seq("ISOLATED", "LOADING"): 1969 self._retreat(index) 1970 return None 1971 1972 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 1973 return self.expression( 1974 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 1975 ) 1976 1977 def _parse_locking(self) -> exp.LockingProperty: 1978 if self._match(TokenType.TABLE): 1979 kind = "TABLE" 1980 elif self._match(TokenType.VIEW): 1981 kind = "VIEW" 1982 elif self._match(TokenType.ROW): 1983 kind = "ROW" 1984 elif self._match_text_seq("DATABASE"): 1985 kind = "DATABASE" 1986 else: 1987 kind = None 1988 1989 if kind in ("DATABASE", "TABLE", "VIEW"): 1990 this = self._parse_table_parts() 1991 else: 1992 this = None 1993 1994 if self._match(TokenType.FOR): 1995 for_or_in = "FOR" 1996 elif self._match(TokenType.IN): 1997 for_or_in = "IN" 1998 else: 1999 for_or_in = None 2000 2001 if self._match_text_seq("ACCESS"): 2002 lock_type = "ACCESS" 2003 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2004 lock_type = "EXCLUSIVE" 2005 elif self._match_text_seq("SHARE"): 2006 lock_type = "SHARE" 2007 elif self._match_text_seq("READ"): 2008 lock_type = "READ" 2009 elif self._match_text_seq("WRITE"): 2010 lock_type = "WRITE" 2011 elif self._match_text_seq("CHECKSUM"): 2012 lock_type = "CHECKSUM" 2013 else: 2014 lock_type = None 2015 2016 override = self._match_text_seq("OVERRIDE") 2017 2018 return self.expression( 2019 exp.LockingProperty, 2020 this=this, 2021 kind=kind, 2022 for_or_in=for_or_in, 2023 lock_type=lock_type, 2024 override=override, 2025 ) 2026 2027 def _parse_partition_by(self) -> t.List[exp.Expression]: 2028 if self._match(TokenType.PARTITION_BY): 2029 return self._parse_csv(self._parse_conjunction) 2030 return [] 2031 2032 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2033 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2034 if self._match_text_seq("MINVALUE"): 2035 return exp.var("MINVALUE") 2036 if self._match_text_seq("MAXVALUE"): 2037 return exp.var("MAXVALUE") 2038 return self._parse_bitwise() 2039 2040 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2041 expression = None 2042 from_expressions = None 2043 to_expressions = None 2044 2045 if self._match(TokenType.IN): 2046 this = self._parse_wrapped_csv(self._parse_bitwise) 2047 elif self._match(TokenType.FROM): 2048 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2049 self._match_text_seq("TO") 2050 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2051 elif self._match_text_seq("WITH", "(", "MODULUS"): 2052 this = self._parse_number() 2053 self._match_text_seq(",", "REMAINDER") 2054 expression = self._parse_number() 2055 self._match_r_paren() 2056 else: 2057 self.raise_error("Failed to parse partition bound spec.") 2058 2059 return self.expression( 2060 exp.PartitionBoundSpec, 2061 this=this, 2062 expression=expression, 2063 from_expressions=from_expressions, 2064 to_expressions=to_expressions, 2065 ) 2066 2067 # https://www.postgresql.org/docs/current/sql-createtable.html 2068 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2069 if not self._match_text_seq("OF"): 2070 self._retreat(self._index - 1) 2071 return None 2072 2073 this = self._parse_table(schema=True) 2074 2075 if self._match(TokenType.DEFAULT): 2076 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2077 elif self._match_text_seq("FOR", "VALUES"): 2078 expression = self._parse_partition_bound_spec() 2079 else: 2080 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2081 2082 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2083 2084 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2085 self._match(TokenType.EQ) 2086 return self.expression( 2087 exp.PartitionedByProperty, 2088 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2089 ) 2090 2091 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2092 if self._match_text_seq("AND", "STATISTICS"): 2093 statistics = True 2094 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2095 statistics = False 2096 else: 2097 statistics = None 2098 2099 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2100 2101 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2102 if self._match_text_seq("SQL"): 2103 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2104 return None 2105 2106 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2107 if self._match_text_seq("SQL", "DATA"): 2108 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2109 return None 2110 2111 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2112 if self._match_text_seq("PRIMARY", "INDEX"): 2113 return exp.NoPrimaryIndexProperty() 2114 if self._match_text_seq("SQL"): 2115 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2116 return None 2117 2118 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2119 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2120 return exp.OnCommitProperty() 2121 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2122 return exp.OnCommitProperty(delete=True) 2123 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2124 2125 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2126 if self._match_text_seq("SQL", "DATA"): 2127 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2128 return None 2129 2130 def _parse_distkey(self) -> exp.DistKeyProperty: 2131 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2132 2133 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2134 table = self._parse_table(schema=True) 2135 2136 options = [] 2137 while self._match_texts(("INCLUDING", "EXCLUDING")): 2138 this = self._prev.text.upper() 2139 2140 id_var = self._parse_id_var() 2141 if not id_var: 2142 return None 2143 2144 options.append( 2145 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2146 ) 2147 2148 return self.expression(exp.LikeProperty, this=table, expressions=options) 2149 2150 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2151 return self.expression( 2152 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2153 ) 2154 2155 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2156 self._match(TokenType.EQ) 2157 return self.expression( 2158 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2159 ) 2160 2161 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2162 self._match_text_seq("WITH", "CONNECTION") 2163 return self.expression( 2164 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2165 ) 2166 2167 def _parse_returns(self) -> exp.ReturnsProperty: 2168 value: t.Optional[exp.Expression] 2169 is_table = self._match(TokenType.TABLE) 2170 2171 if is_table: 2172 if self._match(TokenType.LT): 2173 value = self.expression( 2174 exp.Schema, 2175 this="TABLE", 2176 expressions=self._parse_csv(self._parse_struct_types), 2177 ) 2178 if not self._match(TokenType.GT): 2179 self.raise_error("Expecting >") 2180 else: 2181 value = self._parse_schema(exp.var("TABLE")) 2182 else: 2183 value = self._parse_types() 2184 2185 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2186 2187 def _parse_describe(self) -> exp.Describe: 2188 kind = self._match_set(self.CREATABLES) and self._prev.text 2189 extended = self._match_text_seq("EXTENDED") 2190 this = self._parse_table(schema=True) 2191 properties = self._parse_properties() 2192 expressions = properties.expressions if properties else None 2193 return self.expression( 2194 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2195 ) 2196 2197 def _parse_insert(self) -> exp.Insert: 2198 comments = ensure_list(self._prev_comments) 2199 hint = self._parse_hint() 2200 overwrite = self._match(TokenType.OVERWRITE) 2201 ignore = self._match(TokenType.IGNORE) 2202 local = self._match_text_seq("LOCAL") 2203 alternative = None 2204 is_function = None 2205 2206 if self._match_text_seq("DIRECTORY"): 2207 this: t.Optional[exp.Expression] = self.expression( 2208 exp.Directory, 2209 this=self._parse_var_or_string(), 2210 local=local, 2211 row_format=self._parse_row_format(match_row=True), 2212 ) 2213 else: 2214 if self._match(TokenType.OR): 2215 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2216 2217 self._match(TokenType.INTO) 2218 comments += ensure_list(self._prev_comments) 2219 self._match(TokenType.TABLE) 2220 is_function = self._match(TokenType.FUNCTION) 2221 2222 this = self._parse_table(schema=True) if not is_function else self._parse_function() 2223 2224 returning = self._parse_returning() 2225 2226 return self.expression( 2227 exp.Insert, 2228 comments=comments, 2229 hint=hint, 2230 is_function=is_function, 2231 this=this, 2232 by_name=self._match_text_seq("BY", "NAME"), 2233 exists=self._parse_exists(), 2234 partition=self._parse_partition(), 2235 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2236 and self._parse_conjunction(), 2237 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2238 conflict=self._parse_on_conflict(), 2239 returning=returning or self._parse_returning(), 2240 overwrite=overwrite, 2241 alternative=alternative, 2242 ignore=ignore, 2243 ) 2244 2245 def _parse_kill(self) -> exp.Kill: 2246 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2247 2248 return self.expression( 2249 exp.Kill, 2250 this=self._parse_primary(), 2251 kind=kind, 2252 ) 2253 2254 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2255 conflict = self._match_text_seq("ON", "CONFLICT") 2256 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2257 2258 if not conflict and not duplicate: 2259 return None 2260 2261 conflict_keys = None 2262 constraint = None 2263 2264 if conflict: 2265 if self._match_text_seq("ON", "CONSTRAINT"): 2266 constraint = self._parse_id_var() 2267 elif self._match(TokenType.L_PAREN): 2268 conflict_keys = self._parse_csv(self._parse_id_var) 2269 self._match_r_paren() 2270 2271 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2272 if self._prev.token_type == TokenType.UPDATE: 2273 self._match(TokenType.SET) 2274 expressions = self._parse_csv(self._parse_equality) 2275 else: 2276 expressions = None 2277 2278 return self.expression( 2279 exp.OnConflict, 2280 duplicate=duplicate, 2281 expressions=expressions, 2282 action=action, 2283 conflict_keys=conflict_keys, 2284 constraint=constraint, 2285 ) 2286 2287 def _parse_returning(self) -> t.Optional[exp.Returning]: 2288 if not self._match(TokenType.RETURNING): 2289 return None 2290 return self.expression( 2291 exp.Returning, 2292 expressions=self._parse_csv(self._parse_expression), 2293 into=self._match(TokenType.INTO) and self._parse_table_part(), 2294 ) 2295 2296 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2297 if not self._match(TokenType.FORMAT): 2298 return None 2299 return self._parse_row_format() 2300 2301 def _parse_row_format( 2302 self, match_row: bool = False 2303 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2304 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2305 return None 2306 2307 if self._match_text_seq("SERDE"): 2308 this = self._parse_string() 2309 2310 serde_properties = None 2311 if self._match(TokenType.SERDE_PROPERTIES): 2312 serde_properties = self.expression( 2313 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2314 ) 2315 2316 return self.expression( 2317 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2318 ) 2319 2320 self._match_text_seq("DELIMITED") 2321 2322 kwargs = {} 2323 2324 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2325 kwargs["fields"] = self._parse_string() 2326 if self._match_text_seq("ESCAPED", "BY"): 2327 kwargs["escaped"] = self._parse_string() 2328 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2329 kwargs["collection_items"] = self._parse_string() 2330 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2331 kwargs["map_keys"] = self._parse_string() 2332 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2333 kwargs["lines"] = self._parse_string() 2334 if self._match_text_seq("NULL", "DEFINED", "AS"): 2335 kwargs["null"] = self._parse_string() 2336 2337 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2338 2339 def _parse_load(self) -> exp.LoadData | exp.Command: 2340 if self._match_text_seq("DATA"): 2341 local = self._match_text_seq("LOCAL") 2342 self._match_text_seq("INPATH") 2343 inpath = self._parse_string() 2344 overwrite = self._match(TokenType.OVERWRITE) 2345 self._match_pair(TokenType.INTO, TokenType.TABLE) 2346 2347 return self.expression( 2348 exp.LoadData, 2349 this=self._parse_table(schema=True), 2350 local=local, 2351 overwrite=overwrite, 2352 inpath=inpath, 2353 partition=self._parse_partition(), 2354 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2355 serde=self._match_text_seq("SERDE") and self._parse_string(), 2356 ) 2357 return self._parse_as_command(self._prev) 2358 2359 def _parse_delete(self) -> exp.Delete: 2360 # This handles MySQL's "Multiple-Table Syntax" 2361 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2362 tables = None 2363 comments = self._prev_comments 2364 if not self._match(TokenType.FROM, advance=False): 2365 tables = self._parse_csv(self._parse_table) or None 2366 2367 returning = self._parse_returning() 2368 2369 return self.expression( 2370 exp.Delete, 2371 comments=comments, 2372 tables=tables, 2373 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2374 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2375 where=self._parse_where(), 2376 returning=returning or self._parse_returning(), 2377 limit=self._parse_limit(), 2378 ) 2379 2380 def _parse_update(self) -> exp.Update: 2381 comments = self._prev_comments 2382 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2383 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2384 returning = self._parse_returning() 2385 return self.expression( 2386 exp.Update, 2387 comments=comments, 2388 **{ # type: ignore 2389 "this": this, 2390 "expressions": expressions, 2391 "from": self._parse_from(joins=True), 2392 "where": self._parse_where(), 2393 "returning": returning or self._parse_returning(), 2394 "order": self._parse_order(), 2395 "limit": self._parse_limit(), 2396 }, 2397 ) 2398 2399 def _parse_uncache(self) -> exp.Uncache: 2400 if not self._match(TokenType.TABLE): 2401 self.raise_error("Expecting TABLE after UNCACHE") 2402 2403 return self.expression( 2404 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2405 ) 2406 2407 def _parse_cache(self) -> exp.Cache: 2408 lazy = self._match_text_seq("LAZY") 2409 self._match(TokenType.TABLE) 2410 table = self._parse_table(schema=True) 2411 2412 options = [] 2413 if self._match_text_seq("OPTIONS"): 2414 self._match_l_paren() 2415 k = self._parse_string() 2416 self._match(TokenType.EQ) 2417 v = self._parse_string() 2418 options = [k, v] 2419 self._match_r_paren() 2420 2421 self._match(TokenType.ALIAS) 2422 return self.expression( 2423 exp.Cache, 2424 this=table, 2425 lazy=lazy, 2426 options=options, 2427 expression=self._parse_select(nested=True), 2428 ) 2429 2430 def _parse_partition(self) -> t.Optional[exp.Partition]: 2431 if not self._match(TokenType.PARTITION): 2432 return None 2433 2434 return self.expression( 2435 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2436 ) 2437 2438 def _parse_value(self) -> exp.Tuple: 2439 if self._match(TokenType.L_PAREN): 2440 expressions = self._parse_csv(self._parse_expression) 2441 self._match_r_paren() 2442 return self.expression(exp.Tuple, expressions=expressions) 2443 2444 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2445 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2446 2447 def _parse_projections(self) -> t.List[exp.Expression]: 2448 return self._parse_expressions() 2449 2450 def _parse_select( 2451 self, 2452 nested: bool = False, 2453 table: bool = False, 2454 parse_subquery_alias: bool = True, 2455 parse_set_operation: bool = True, 2456 ) -> t.Optional[exp.Expression]: 2457 cte = self._parse_with() 2458 2459 if cte: 2460 this = self._parse_statement() 2461 2462 if not this: 2463 self.raise_error("Failed to parse any statement following CTE") 2464 return cte 2465 2466 if "with" in this.arg_types: 2467 this.set("with", cte) 2468 else: 2469 self.raise_error(f"{this.key} does not support CTE") 2470 this = cte 2471 2472 return this 2473 2474 # duckdb supports leading with FROM x 2475 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2476 2477 if self._match(TokenType.SELECT): 2478 comments = self._prev_comments 2479 2480 hint = self._parse_hint() 2481 all_ = self._match(TokenType.ALL) 2482 distinct = self._match_set(self.DISTINCT_TOKENS) 2483 2484 kind = ( 2485 self._match(TokenType.ALIAS) 2486 and self._match_texts(("STRUCT", "VALUE")) 2487 and self._prev.text.upper() 2488 ) 2489 2490 if distinct: 2491 distinct = self.expression( 2492 exp.Distinct, 2493 on=self._parse_value() if self._match(TokenType.ON) else None, 2494 ) 2495 2496 if all_ and distinct: 2497 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2498 2499 limit = self._parse_limit(top=True) 2500 projections = self._parse_projections() 2501 2502 this = self.expression( 2503 exp.Select, 2504 kind=kind, 2505 hint=hint, 2506 distinct=distinct, 2507 expressions=projections, 2508 limit=limit, 2509 ) 2510 this.comments = comments 2511 2512 into = self._parse_into() 2513 if into: 2514 this.set("into", into) 2515 2516 if not from_: 2517 from_ = self._parse_from() 2518 2519 if from_: 2520 this.set("from", from_) 2521 2522 this = self._parse_query_modifiers(this) 2523 elif (table or nested) and self._match(TokenType.L_PAREN): 2524 if self._match(TokenType.PIVOT): 2525 this = self._parse_simplified_pivot() 2526 elif self._match(TokenType.FROM): 2527 this = exp.select("*").from_( 2528 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2529 ) 2530 else: 2531 this = ( 2532 self._parse_table() 2533 if table 2534 else self._parse_select(nested=True, parse_set_operation=False) 2535 ) 2536 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2537 2538 self._match_r_paren() 2539 2540 # We return early here so that the UNION isn't attached to the subquery by the 2541 # following call to _parse_set_operations, but instead becomes the parent node 2542 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2543 elif self._match(TokenType.VALUES, advance=False): 2544 this = self._parse_derived_table_values() 2545 elif from_: 2546 this = exp.select("*").from_(from_.this, copy=False) 2547 else: 2548 this = None 2549 2550 if parse_set_operation: 2551 return self._parse_set_operations(this) 2552 return this 2553 2554 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2555 if not skip_with_token and not self._match(TokenType.WITH): 2556 return None 2557 2558 comments = self._prev_comments 2559 recursive = self._match(TokenType.RECURSIVE) 2560 2561 expressions = [] 2562 while True: 2563 expressions.append(self._parse_cte()) 2564 2565 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2566 break 2567 else: 2568 self._match(TokenType.WITH) 2569 2570 return self.expression( 2571 exp.With, comments=comments, expressions=expressions, recursive=recursive 2572 ) 2573 2574 def _parse_cte(self) -> exp.CTE: 2575 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2576 if not alias or not alias.this: 2577 self.raise_error("Expected CTE to have alias") 2578 2579 self._match(TokenType.ALIAS) 2580 2581 if self._match_text_seq("NOT", "MATERIALIZED"): 2582 materialized = False 2583 elif self._match_text_seq("MATERIALIZED"): 2584 materialized = True 2585 else: 2586 materialized = None 2587 2588 return self.expression( 2589 exp.CTE, 2590 this=self._parse_wrapped(self._parse_statement), 2591 alias=alias, 2592 materialized=materialized, 2593 ) 2594 2595 def _parse_table_alias( 2596 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2597 ) -> t.Optional[exp.TableAlias]: 2598 any_token = self._match(TokenType.ALIAS) 2599 alias = ( 2600 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2601 or self._parse_string_as_identifier() 2602 ) 2603 2604 index = self._index 2605 if self._match(TokenType.L_PAREN): 2606 columns = self._parse_csv(self._parse_function_parameter) 2607 self._match_r_paren() if columns else self._retreat(index) 2608 else: 2609 columns = None 2610 2611 if not alias and not columns: 2612 return None 2613 2614 return self.expression(exp.TableAlias, this=alias, columns=columns) 2615 2616 def _parse_subquery( 2617 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2618 ) -> t.Optional[exp.Subquery]: 2619 if not this: 2620 return None 2621 2622 return self.expression( 2623 exp.Subquery, 2624 this=this, 2625 pivots=self._parse_pivots(), 2626 alias=self._parse_table_alias() if parse_alias else None, 2627 ) 2628 2629 def _implicit_unnests_to_explicit(self, this: E) -> E: 2630 from sqlglot.optimizer.normalize_identifiers import ( 2631 normalize_identifiers as _norm, 2632 ) 2633 2634 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2635 for i, join in enumerate(this.args.get("joins") or []): 2636 table = join.this 2637 normalized_table = table.copy() 2638 normalized_table.meta["maybe_column"] = True 2639 normalized_table = _norm(normalized_table, dialect=self.dialect) 2640 2641 if isinstance(table, exp.Table) and not join.args.get("on"): 2642 if normalized_table.parts[0].name in refs: 2643 table_as_column = table.to_column() 2644 unnest = exp.Unnest(expressions=[table_as_column]) 2645 2646 # Table.to_column creates a parent Alias node that we want to convert to 2647 # a TableAlias and attach to the Unnest, so it matches the parser's output 2648 if isinstance(table.args.get("alias"), exp.TableAlias): 2649 table_as_column.replace(table_as_column.this) 2650 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2651 2652 table.replace(unnest) 2653 2654 refs.add(normalized_table.alias_or_name) 2655 2656 return this 2657 2658 def _parse_query_modifiers( 2659 self, this: t.Optional[exp.Expression] 2660 ) -> t.Optional[exp.Expression]: 2661 if isinstance(this, (exp.Query, exp.Table)): 2662 for join in iter(self._parse_join, None): 2663 this.append("joins", join) 2664 for lateral in iter(self._parse_lateral, None): 2665 this.append("laterals", lateral) 2666 2667 while True: 2668 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2669 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2670 key, expression = parser(self) 2671 2672 if expression: 2673 this.set(key, expression) 2674 if key == "limit": 2675 offset = expression.args.pop("offset", None) 2676 2677 if offset: 2678 offset = exp.Offset(expression=offset) 2679 this.set("offset", offset) 2680 2681 limit_by_expressions = expression.expressions 2682 expression.set("expressions", None) 2683 offset.set("expressions", limit_by_expressions) 2684 continue 2685 break 2686 2687 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2688 this = self._implicit_unnests_to_explicit(this) 2689 2690 return this 2691 2692 def _parse_hint(self) -> t.Optional[exp.Hint]: 2693 if self._match(TokenType.HINT): 2694 hints = [] 2695 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2696 hints.extend(hint) 2697 2698 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2699 self.raise_error("Expected */ after HINT") 2700 2701 return self.expression(exp.Hint, expressions=hints) 2702 2703 return None 2704 2705 def _parse_into(self) -> t.Optional[exp.Into]: 2706 if not self._match(TokenType.INTO): 2707 return None 2708 2709 temp = self._match(TokenType.TEMPORARY) 2710 unlogged = self._match_text_seq("UNLOGGED") 2711 self._match(TokenType.TABLE) 2712 2713 return self.expression( 2714 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2715 ) 2716 2717 def _parse_from( 2718 self, joins: bool = False, skip_from_token: bool = False 2719 ) -> t.Optional[exp.From]: 2720 if not skip_from_token and not self._match(TokenType.FROM): 2721 return None 2722 2723 return self.expression( 2724 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2725 ) 2726 2727 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2728 if not self._match(TokenType.MATCH_RECOGNIZE): 2729 return None 2730 2731 self._match_l_paren() 2732 2733 partition = self._parse_partition_by() 2734 order = self._parse_order() 2735 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2736 2737 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2738 rows = exp.var("ONE ROW PER MATCH") 2739 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2740 text = "ALL ROWS PER MATCH" 2741 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2742 text += " SHOW EMPTY MATCHES" 2743 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2744 text += " OMIT EMPTY MATCHES" 2745 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2746 text += " WITH UNMATCHED ROWS" 2747 rows = exp.var(text) 2748 else: 2749 rows = None 2750 2751 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2752 text = "AFTER MATCH SKIP" 2753 if self._match_text_seq("PAST", "LAST", "ROW"): 2754 text += " PAST LAST ROW" 2755 elif self._match_text_seq("TO", "NEXT", "ROW"): 2756 text += " TO NEXT ROW" 2757 elif self._match_text_seq("TO", "FIRST"): 2758 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2759 elif self._match_text_seq("TO", "LAST"): 2760 text += f" TO LAST {self._advance_any().text}" # type: ignore 2761 after = exp.var(text) 2762 else: 2763 after = None 2764 2765 if self._match_text_seq("PATTERN"): 2766 self._match_l_paren() 2767 2768 if not self._curr: 2769 self.raise_error("Expecting )", self._curr) 2770 2771 paren = 1 2772 start = self._curr 2773 2774 while self._curr and paren > 0: 2775 if self._curr.token_type == TokenType.L_PAREN: 2776 paren += 1 2777 if self._curr.token_type == TokenType.R_PAREN: 2778 paren -= 1 2779 2780 end = self._prev 2781 self._advance() 2782 2783 if paren > 0: 2784 self.raise_error("Expecting )", self._curr) 2785 2786 pattern = exp.var(self._find_sql(start, end)) 2787 else: 2788 pattern = None 2789 2790 define = ( 2791 self._parse_csv(self._parse_name_as_expression) 2792 if self._match_text_seq("DEFINE") 2793 else None 2794 ) 2795 2796 self._match_r_paren() 2797 2798 return self.expression( 2799 exp.MatchRecognize, 2800 partition_by=partition, 2801 order=order, 2802 measures=measures, 2803 rows=rows, 2804 after=after, 2805 pattern=pattern, 2806 define=define, 2807 alias=self._parse_table_alias(), 2808 ) 2809 2810 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2811 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2812 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2813 cross_apply = False 2814 2815 if cross_apply is not None: 2816 this = self._parse_select(table=True) 2817 view = None 2818 outer = None 2819 elif self._match(TokenType.LATERAL): 2820 this = self._parse_select(table=True) 2821 view = self._match(TokenType.VIEW) 2822 outer = self._match(TokenType.OUTER) 2823 else: 2824 return None 2825 2826 if not this: 2827 this = ( 2828 self._parse_unnest() 2829 or self._parse_function() 2830 or self._parse_id_var(any_token=False) 2831 ) 2832 2833 while self._match(TokenType.DOT): 2834 this = exp.Dot( 2835 this=this, 2836 expression=self._parse_function() or self._parse_id_var(any_token=False), 2837 ) 2838 2839 if view: 2840 table = self._parse_id_var(any_token=False) 2841 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2842 table_alias: t.Optional[exp.TableAlias] = self.expression( 2843 exp.TableAlias, this=table, columns=columns 2844 ) 2845 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2846 # We move the alias from the lateral's child node to the lateral itself 2847 table_alias = this.args["alias"].pop() 2848 else: 2849 table_alias = self._parse_table_alias() 2850 2851 return self.expression( 2852 exp.Lateral, 2853 this=this, 2854 view=view, 2855 outer=outer, 2856 alias=table_alias, 2857 cross_apply=cross_apply, 2858 ) 2859 2860 def _parse_join_parts( 2861 self, 2862 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2863 return ( 2864 self._match_set(self.JOIN_METHODS) and self._prev, 2865 self._match_set(self.JOIN_SIDES) and self._prev, 2866 self._match_set(self.JOIN_KINDS) and self._prev, 2867 ) 2868 2869 def _parse_join( 2870 self, skip_join_token: bool = False, parse_bracket: bool = False 2871 ) -> t.Optional[exp.Join]: 2872 if self._match(TokenType.COMMA): 2873 return self.expression(exp.Join, this=self._parse_table()) 2874 2875 index = self._index 2876 method, side, kind = self._parse_join_parts() 2877 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2878 join = self._match(TokenType.JOIN) 2879 2880 if not skip_join_token and not join: 2881 self._retreat(index) 2882 kind = None 2883 method = None 2884 side = None 2885 2886 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2887 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2888 2889 if not skip_join_token and not join and not outer_apply and not cross_apply: 2890 return None 2891 2892 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2893 2894 if method: 2895 kwargs["method"] = method.text 2896 if side: 2897 kwargs["side"] = side.text 2898 if kind: 2899 kwargs["kind"] = kind.text 2900 if hint: 2901 kwargs["hint"] = hint 2902 2903 if self._match(TokenType.ON): 2904 kwargs["on"] = self._parse_conjunction() 2905 elif self._match(TokenType.USING): 2906 kwargs["using"] = self._parse_wrapped_id_vars() 2907 elif not (kind and kind.token_type == TokenType.CROSS): 2908 index = self._index 2909 join = self._parse_join() 2910 2911 if join and self._match(TokenType.ON): 2912 kwargs["on"] = self._parse_conjunction() 2913 elif join and self._match(TokenType.USING): 2914 kwargs["using"] = self._parse_wrapped_id_vars() 2915 else: 2916 join = None 2917 self._retreat(index) 2918 2919 kwargs["this"].set("joins", [join] if join else None) 2920 2921 comments = [c for token in (method, side, kind) if token for c in token.comments] 2922 return self.expression(exp.Join, comments=comments, **kwargs) 2923 2924 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2925 this = self._parse_conjunction() 2926 2927 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2928 return this 2929 2930 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2931 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2932 2933 return this 2934 2935 def _parse_index_params(self) -> exp.IndexParameters: 2936 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2937 2938 if self._match(TokenType.L_PAREN, advance=False): 2939 columns = self._parse_wrapped_csv(self._parse_with_operator) 2940 else: 2941 columns = None 2942 2943 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2944 partition_by = self._parse_partition_by() 2945 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 2946 tablespace = ( 2947 self._parse_var(any_token=True) 2948 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 2949 else None 2950 ) 2951 where = self._parse_where() 2952 2953 return self.expression( 2954 exp.IndexParameters, 2955 using=using, 2956 columns=columns, 2957 include=include, 2958 partition_by=partition_by, 2959 where=where, 2960 with_storage=with_storage, 2961 tablespace=tablespace, 2962 ) 2963 2964 def _parse_index( 2965 self, 2966 index: t.Optional[exp.Expression] = None, 2967 ) -> t.Optional[exp.Index]: 2968 if index: 2969 unique = None 2970 primary = None 2971 amp = None 2972 2973 self._match(TokenType.ON) 2974 self._match(TokenType.TABLE) # hive 2975 table = self._parse_table_parts(schema=True) 2976 else: 2977 unique = self._match(TokenType.UNIQUE) 2978 primary = self._match_text_seq("PRIMARY") 2979 amp = self._match_text_seq("AMP") 2980 2981 if not self._match(TokenType.INDEX): 2982 return None 2983 2984 index = self._parse_id_var() 2985 table = None 2986 2987 params = self._parse_index_params() 2988 2989 return self.expression( 2990 exp.Index, 2991 this=index, 2992 table=table, 2993 unique=unique, 2994 primary=primary, 2995 amp=amp, 2996 params=params, 2997 ) 2998 2999 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3000 hints: t.List[exp.Expression] = [] 3001 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3002 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3003 hints.append( 3004 self.expression( 3005 exp.WithTableHint, 3006 expressions=self._parse_csv( 3007 lambda: self._parse_function() or self._parse_var(any_token=True) 3008 ), 3009 ) 3010 ) 3011 self._match_r_paren() 3012 else: 3013 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3014 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3015 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3016 3017 self._match_texts(("INDEX", "KEY")) 3018 if self._match(TokenType.FOR): 3019 hint.set("target", self._advance_any() and self._prev.text.upper()) 3020 3021 hint.set("expressions", self._parse_wrapped_id_vars()) 3022 hints.append(hint) 3023 3024 return hints or None 3025 3026 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3027 return ( 3028 (not schema and self._parse_function(optional_parens=False)) 3029 or self._parse_id_var(any_token=False) 3030 or self._parse_string_as_identifier() 3031 or self._parse_placeholder() 3032 ) 3033 3034 def _parse_table_parts( 3035 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3036 ) -> exp.Table: 3037 catalog = None 3038 db = None 3039 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3040 3041 while self._match(TokenType.DOT): 3042 if catalog: 3043 # This allows nesting the table in arbitrarily many dot expressions if needed 3044 table = self.expression( 3045 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3046 ) 3047 else: 3048 catalog = db 3049 db = table 3050 # "" used for tsql FROM a..b case 3051 table = self._parse_table_part(schema=schema) or "" 3052 3053 if ( 3054 wildcard 3055 and self._is_connected() 3056 and (isinstance(table, exp.Identifier) or not table) 3057 and self._match(TokenType.STAR) 3058 ): 3059 if isinstance(table, exp.Identifier): 3060 table.args["this"] += "*" 3061 else: 3062 table = exp.Identifier(this="*") 3063 3064 if is_db_reference: 3065 catalog = db 3066 db = table 3067 table = None 3068 3069 if not table and not is_db_reference: 3070 self.raise_error(f"Expected table name but got {self._curr}") 3071 if not db and is_db_reference: 3072 self.raise_error(f"Expected database name but got {self._curr}") 3073 3074 return self.expression( 3075 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 3076 ) 3077 3078 def _parse_table( 3079 self, 3080 schema: bool = False, 3081 joins: bool = False, 3082 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3083 parse_bracket: bool = False, 3084 is_db_reference: bool = False, 3085 ) -> t.Optional[exp.Expression]: 3086 lateral = self._parse_lateral() 3087 if lateral: 3088 return lateral 3089 3090 unnest = self._parse_unnest() 3091 if unnest: 3092 return unnest 3093 3094 values = self._parse_derived_table_values() 3095 if values: 3096 return values 3097 3098 subquery = self._parse_select(table=True) 3099 if subquery: 3100 if not subquery.args.get("pivots"): 3101 subquery.set("pivots", self._parse_pivots()) 3102 return subquery 3103 3104 bracket = parse_bracket and self._parse_bracket(None) 3105 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3106 3107 only = self._match(TokenType.ONLY) 3108 3109 this = t.cast( 3110 exp.Expression, 3111 bracket 3112 or self._parse_bracket( 3113 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3114 ), 3115 ) 3116 3117 if only: 3118 this.set("only", only) 3119 3120 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3121 self._match_text_seq("*") 3122 3123 if schema: 3124 return self._parse_schema(this=this) 3125 3126 version = self._parse_version() 3127 3128 if version: 3129 this.set("version", version) 3130 3131 if self.dialect.ALIAS_POST_TABLESAMPLE: 3132 table_sample = self._parse_table_sample() 3133 3134 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3135 if alias: 3136 this.set("alias", alias) 3137 3138 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3139 return self.expression( 3140 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3141 ) 3142 3143 this.set("hints", self._parse_table_hints()) 3144 3145 if not this.args.get("pivots"): 3146 this.set("pivots", self._parse_pivots()) 3147 3148 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3149 table_sample = self._parse_table_sample() 3150 3151 if table_sample: 3152 table_sample.set("this", this) 3153 this = table_sample 3154 3155 if joins: 3156 for join in iter(self._parse_join, None): 3157 this.append("joins", join) 3158 3159 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3160 this.set("ordinality", True) 3161 this.set("alias", self._parse_table_alias()) 3162 3163 return this 3164 3165 def _parse_version(self) -> t.Optional[exp.Version]: 3166 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3167 this = "TIMESTAMP" 3168 elif self._match(TokenType.VERSION_SNAPSHOT): 3169 this = "VERSION" 3170 else: 3171 return None 3172 3173 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3174 kind = self._prev.text.upper() 3175 start = self._parse_bitwise() 3176 self._match_texts(("TO", "AND")) 3177 end = self._parse_bitwise() 3178 expression: t.Optional[exp.Expression] = self.expression( 3179 exp.Tuple, expressions=[start, end] 3180 ) 3181 elif self._match_text_seq("CONTAINED", "IN"): 3182 kind = "CONTAINED IN" 3183 expression = self.expression( 3184 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3185 ) 3186 elif self._match(TokenType.ALL): 3187 kind = "ALL" 3188 expression = None 3189 else: 3190 self._match_text_seq("AS", "OF") 3191 kind = "AS OF" 3192 expression = self._parse_type() 3193 3194 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3195 3196 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3197 if not self._match(TokenType.UNNEST): 3198 return None 3199 3200 expressions = self._parse_wrapped_csv(self._parse_equality) 3201 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3202 3203 alias = self._parse_table_alias() if with_alias else None 3204 3205 if alias: 3206 if self.dialect.UNNEST_COLUMN_ONLY: 3207 if alias.args.get("columns"): 3208 self.raise_error("Unexpected extra column alias in unnest.") 3209 3210 alias.set("columns", [alias.this]) 3211 alias.set("this", None) 3212 3213 columns = alias.args.get("columns") or [] 3214 if offset and len(expressions) < len(columns): 3215 offset = columns.pop() 3216 3217 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3218 self._match(TokenType.ALIAS) 3219 offset = self._parse_id_var( 3220 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3221 ) or exp.to_identifier("offset") 3222 3223 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3224 3225 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3226 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3227 if not is_derived and not self._match_text_seq("VALUES"): 3228 return None 3229 3230 expressions = self._parse_csv(self._parse_value) 3231 alias = self._parse_table_alias() 3232 3233 if is_derived: 3234 self._match_r_paren() 3235 3236 return self.expression( 3237 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3238 ) 3239 3240 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3241 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3242 as_modifier and self._match_text_seq("USING", "SAMPLE") 3243 ): 3244 return None 3245 3246 bucket_numerator = None 3247 bucket_denominator = None 3248 bucket_field = None 3249 percent = None 3250 size = None 3251 seed = None 3252 3253 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3254 matched_l_paren = self._match(TokenType.L_PAREN) 3255 3256 if self.TABLESAMPLE_CSV: 3257 num = None 3258 expressions = self._parse_csv(self._parse_primary) 3259 else: 3260 expressions = None 3261 num = ( 3262 self._parse_factor() 3263 if self._match(TokenType.NUMBER, advance=False) 3264 else self._parse_primary() or self._parse_placeholder() 3265 ) 3266 3267 if self._match_text_seq("BUCKET"): 3268 bucket_numerator = self._parse_number() 3269 self._match_text_seq("OUT", "OF") 3270 bucket_denominator = bucket_denominator = self._parse_number() 3271 self._match(TokenType.ON) 3272 bucket_field = self._parse_field() 3273 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3274 percent = num 3275 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3276 size = num 3277 else: 3278 percent = num 3279 3280 if matched_l_paren: 3281 self._match_r_paren() 3282 3283 if self._match(TokenType.L_PAREN): 3284 method = self._parse_var(upper=True) 3285 seed = self._match(TokenType.COMMA) and self._parse_number() 3286 self._match_r_paren() 3287 elif self._match_texts(("SEED", "REPEATABLE")): 3288 seed = self._parse_wrapped(self._parse_number) 3289 3290 return self.expression( 3291 exp.TableSample, 3292 expressions=expressions, 3293 method=method, 3294 bucket_numerator=bucket_numerator, 3295 bucket_denominator=bucket_denominator, 3296 bucket_field=bucket_field, 3297 percent=percent, 3298 size=size, 3299 seed=seed, 3300 ) 3301 3302 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3303 return list(iter(self._parse_pivot, None)) or None 3304 3305 # https://duckdb.org/docs/sql/statements/pivot 3306 def _parse_simplified_pivot(self) -> exp.Pivot: 3307 def _parse_on() -> t.Optional[exp.Expression]: 3308 this = self._parse_bitwise() 3309 return self._parse_in(this) if self._match(TokenType.IN) else this 3310 3311 this = self._parse_table() 3312 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3313 using = self._match(TokenType.USING) and self._parse_csv( 3314 lambda: self._parse_alias(self._parse_function()) 3315 ) 3316 group = self._parse_group() 3317 return self.expression( 3318 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3319 ) 3320 3321 def _parse_pivot_in(self) -> exp.In: 3322 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3323 this = self._parse_conjunction() 3324 3325 self._match(TokenType.ALIAS) 3326 alias = self._parse_field() 3327 if alias: 3328 return self.expression(exp.PivotAlias, this=this, alias=alias) 3329 3330 return this 3331 3332 value = self._parse_column() 3333 3334 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3335 self.raise_error("Expecting IN (") 3336 3337 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3338 3339 self._match_r_paren() 3340 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3341 3342 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3343 index = self._index 3344 include_nulls = None 3345 3346 if self._match(TokenType.PIVOT): 3347 unpivot = False 3348 elif self._match(TokenType.UNPIVOT): 3349 unpivot = True 3350 3351 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3352 if self._match_text_seq("INCLUDE", "NULLS"): 3353 include_nulls = True 3354 elif self._match_text_seq("EXCLUDE", "NULLS"): 3355 include_nulls = False 3356 else: 3357 return None 3358 3359 expressions = [] 3360 3361 if not self._match(TokenType.L_PAREN): 3362 self._retreat(index) 3363 return None 3364 3365 if unpivot: 3366 expressions = self._parse_csv(self._parse_column) 3367 else: 3368 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3369 3370 if not expressions: 3371 self.raise_error("Failed to parse PIVOT's aggregation list") 3372 3373 if not self._match(TokenType.FOR): 3374 self.raise_error("Expecting FOR") 3375 3376 field = self._parse_pivot_in() 3377 3378 self._match_r_paren() 3379 3380 pivot = self.expression( 3381 exp.Pivot, 3382 expressions=expressions, 3383 field=field, 3384 unpivot=unpivot, 3385 include_nulls=include_nulls, 3386 ) 3387 3388 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3389 pivot.set("alias", self._parse_table_alias()) 3390 3391 if not unpivot: 3392 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3393 3394 columns: t.List[exp.Expression] = [] 3395 for fld in pivot.args["field"].expressions: 3396 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3397 for name in names: 3398 if self.PREFIXED_PIVOT_COLUMNS: 3399 name = f"{name}_{field_name}" if name else field_name 3400 else: 3401 name = f"{field_name}_{name}" if name else field_name 3402 3403 columns.append(exp.to_identifier(name)) 3404 3405 pivot.set("columns", columns) 3406 3407 return pivot 3408 3409 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3410 return [agg.alias for agg in aggregations] 3411 3412 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3413 if not skip_where_token and not self._match(TokenType.PREWHERE): 3414 return None 3415 3416 return self.expression( 3417 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3418 ) 3419 3420 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3421 if not skip_where_token and not self._match(TokenType.WHERE): 3422 return None 3423 3424 return self.expression( 3425 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3426 ) 3427 3428 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3429 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3430 return None 3431 3432 elements = defaultdict(list) 3433 3434 if self._match(TokenType.ALL): 3435 return self.expression(exp.Group, all=True) 3436 3437 while True: 3438 expressions = self._parse_csv(self._parse_conjunction) 3439 if expressions: 3440 elements["expressions"].extend(expressions) 3441 3442 grouping_sets = self._parse_grouping_sets() 3443 if grouping_sets: 3444 elements["grouping_sets"].extend(grouping_sets) 3445 3446 rollup = None 3447 cube = None 3448 totals = None 3449 3450 index = self._index 3451 with_ = self._match(TokenType.WITH) 3452 if self._match(TokenType.ROLLUP): 3453 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3454 elements["rollup"].extend(ensure_list(rollup)) 3455 3456 if self._match(TokenType.CUBE): 3457 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3458 elements["cube"].extend(ensure_list(cube)) 3459 3460 if self._match_text_seq("TOTALS"): 3461 totals = True 3462 elements["totals"] = True # type: ignore 3463 3464 if not (grouping_sets or rollup or cube or totals): 3465 if with_: 3466 self._retreat(index) 3467 break 3468 3469 return self.expression(exp.Group, **elements) # type: ignore 3470 3471 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3472 if not self._match(TokenType.GROUPING_SETS): 3473 return None 3474 3475 return self._parse_wrapped_csv(self._parse_grouping_set) 3476 3477 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3478 if self._match(TokenType.L_PAREN): 3479 grouping_set = self._parse_csv(self._parse_column) 3480 self._match_r_paren() 3481 return self.expression(exp.Tuple, expressions=grouping_set) 3482 3483 return self._parse_column() 3484 3485 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3486 if not skip_having_token and not self._match(TokenType.HAVING): 3487 return None 3488 return self.expression(exp.Having, this=self._parse_conjunction()) 3489 3490 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3491 if not self._match(TokenType.QUALIFY): 3492 return None 3493 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3494 3495 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3496 if skip_start_token: 3497 start = None 3498 elif self._match(TokenType.START_WITH): 3499 start = self._parse_conjunction() 3500 else: 3501 return None 3502 3503 self._match(TokenType.CONNECT_BY) 3504 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3505 exp.Prior, this=self._parse_bitwise() 3506 ) 3507 connect = self._parse_conjunction() 3508 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3509 3510 if not start and self._match(TokenType.START_WITH): 3511 start = self._parse_conjunction() 3512 3513 return self.expression(exp.Connect, start=start, connect=connect) 3514 3515 def _parse_name_as_expression(self) -> exp.Alias: 3516 return self.expression( 3517 exp.Alias, 3518 alias=self._parse_id_var(any_token=True), 3519 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3520 ) 3521 3522 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3523 if self._match_text_seq("INTERPOLATE"): 3524 return self._parse_wrapped_csv(self._parse_name_as_expression) 3525 return None 3526 3527 def _parse_order( 3528 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3529 ) -> t.Optional[exp.Expression]: 3530 siblings = None 3531 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3532 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3533 return this 3534 3535 siblings = True 3536 3537 return self.expression( 3538 exp.Order, 3539 this=this, 3540 expressions=self._parse_csv(self._parse_ordered), 3541 interpolate=self._parse_interpolate(), 3542 siblings=siblings, 3543 ) 3544 3545 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3546 if not self._match(token): 3547 return None 3548 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3549 3550 def _parse_ordered( 3551 self, parse_method: t.Optional[t.Callable] = None 3552 ) -> t.Optional[exp.Ordered]: 3553 this = parse_method() if parse_method else self._parse_conjunction() 3554 if not this: 3555 return None 3556 3557 asc = self._match(TokenType.ASC) 3558 desc = self._match(TokenType.DESC) or (asc and False) 3559 3560 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3561 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3562 3563 nulls_first = is_nulls_first or False 3564 explicitly_null_ordered = is_nulls_first or is_nulls_last 3565 3566 if ( 3567 not explicitly_null_ordered 3568 and ( 3569 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3570 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3571 ) 3572 and self.dialect.NULL_ORDERING != "nulls_are_last" 3573 ): 3574 nulls_first = True 3575 3576 if self._match_text_seq("WITH", "FILL"): 3577 with_fill = self.expression( 3578 exp.WithFill, 3579 **{ # type: ignore 3580 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3581 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3582 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3583 }, 3584 ) 3585 else: 3586 with_fill = None 3587 3588 return self.expression( 3589 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3590 ) 3591 3592 def _parse_limit( 3593 self, 3594 this: t.Optional[exp.Expression] = None, 3595 top: bool = False, 3596 skip_limit_token: bool = False, 3597 ) -> t.Optional[exp.Expression]: 3598 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3599 comments = self._prev_comments 3600 if top: 3601 limit_paren = self._match(TokenType.L_PAREN) 3602 expression = self._parse_term() if limit_paren else self._parse_number() 3603 3604 if limit_paren: 3605 self._match_r_paren() 3606 else: 3607 expression = self._parse_term() 3608 3609 if self._match(TokenType.COMMA): 3610 offset = expression 3611 expression = self._parse_term() 3612 else: 3613 offset = None 3614 3615 limit_exp = self.expression( 3616 exp.Limit, 3617 this=this, 3618 expression=expression, 3619 offset=offset, 3620 comments=comments, 3621 expressions=self._parse_limit_by(), 3622 ) 3623 3624 return limit_exp 3625 3626 if self._match(TokenType.FETCH): 3627 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3628 direction = self._prev.text.upper() if direction else "FIRST" 3629 3630 count = self._parse_field(tokens=self.FETCH_TOKENS) 3631 percent = self._match(TokenType.PERCENT) 3632 3633 self._match_set((TokenType.ROW, TokenType.ROWS)) 3634 3635 only = self._match_text_seq("ONLY") 3636 with_ties = self._match_text_seq("WITH", "TIES") 3637 3638 if only and with_ties: 3639 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3640 3641 return self.expression( 3642 exp.Fetch, 3643 direction=direction, 3644 count=count, 3645 percent=percent, 3646 with_ties=with_ties, 3647 ) 3648 3649 return this 3650 3651 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3652 if not self._match(TokenType.OFFSET): 3653 return this 3654 3655 count = self._parse_term() 3656 self._match_set((TokenType.ROW, TokenType.ROWS)) 3657 3658 return self.expression( 3659 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3660 ) 3661 3662 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3663 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3664 3665 def _parse_locks(self) -> t.List[exp.Lock]: 3666 locks = [] 3667 while True: 3668 if self._match_text_seq("FOR", "UPDATE"): 3669 update = True 3670 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3671 "LOCK", "IN", "SHARE", "MODE" 3672 ): 3673 update = False 3674 else: 3675 break 3676 3677 expressions = None 3678 if self._match_text_seq("OF"): 3679 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3680 3681 wait: t.Optional[bool | exp.Expression] = None 3682 if self._match_text_seq("NOWAIT"): 3683 wait = True 3684 elif self._match_text_seq("WAIT"): 3685 wait = self._parse_primary() 3686 elif self._match_text_seq("SKIP", "LOCKED"): 3687 wait = False 3688 3689 locks.append( 3690 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3691 ) 3692 3693 return locks 3694 3695 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3696 while this and self._match_set(self.SET_OPERATIONS): 3697 token_type = self._prev.token_type 3698 3699 if token_type == TokenType.UNION: 3700 operation = exp.Union 3701 elif token_type == TokenType.EXCEPT: 3702 operation = exp.Except 3703 else: 3704 operation = exp.Intersect 3705 3706 comments = self._prev.comments 3707 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3708 by_name = self._match_text_seq("BY", "NAME") 3709 expression = self._parse_select(nested=True, parse_set_operation=False) 3710 3711 this = self.expression( 3712 operation, 3713 comments=comments, 3714 this=this, 3715 distinct=distinct, 3716 by_name=by_name, 3717 expression=expression, 3718 ) 3719 3720 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3721 expression = this.expression 3722 3723 if expression: 3724 for arg in self.UNION_MODIFIERS: 3725 expr = expression.args.get(arg) 3726 if expr: 3727 this.set(arg, expr.pop()) 3728 3729 return this 3730 3731 def _parse_expression(self) -> t.Optional[exp.Expression]: 3732 return self._parse_alias(self._parse_conjunction()) 3733 3734 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3735 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3736 3737 def _parse_equality(self) -> t.Optional[exp.Expression]: 3738 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3739 3740 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3741 return self._parse_tokens(self._parse_range, self.COMPARISON) 3742 3743 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3744 this = this or self._parse_bitwise() 3745 negate = self._match(TokenType.NOT) 3746 3747 if self._match_set(self.RANGE_PARSERS): 3748 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3749 if not expression: 3750 return this 3751 3752 this = expression 3753 elif self._match(TokenType.ISNULL): 3754 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3755 3756 # Postgres supports ISNULL and NOTNULL for conditions. 3757 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3758 if self._match(TokenType.NOTNULL): 3759 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3760 this = self.expression(exp.Not, this=this) 3761 3762 if negate: 3763 this = self.expression(exp.Not, this=this) 3764 3765 if self._match(TokenType.IS): 3766 this = self._parse_is(this) 3767 3768 return this 3769 3770 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3771 index = self._index - 1 3772 negate = self._match(TokenType.NOT) 3773 3774 if self._match_text_seq("DISTINCT", "FROM"): 3775 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3776 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3777 3778 expression = self._parse_null() or self._parse_boolean() 3779 if not expression: 3780 self._retreat(index) 3781 return None 3782 3783 this = self.expression(exp.Is, this=this, expression=expression) 3784 return self.expression(exp.Not, this=this) if negate else this 3785 3786 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3787 unnest = self._parse_unnest(with_alias=False) 3788 if unnest: 3789 this = self.expression(exp.In, this=this, unnest=unnest) 3790 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3791 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3792 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3793 3794 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3795 this = self.expression(exp.In, this=this, query=expressions[0]) 3796 else: 3797 this = self.expression(exp.In, this=this, expressions=expressions) 3798 3799 if matched_l_paren: 3800 self._match_r_paren(this) 3801 elif not self._match(TokenType.R_BRACKET, expression=this): 3802 self.raise_error("Expecting ]") 3803 else: 3804 this = self.expression(exp.In, this=this, field=self._parse_field()) 3805 3806 return this 3807 3808 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3809 low = self._parse_bitwise() 3810 self._match(TokenType.AND) 3811 high = self._parse_bitwise() 3812 return self.expression(exp.Between, this=this, low=low, high=high) 3813 3814 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3815 if not self._match(TokenType.ESCAPE): 3816 return this 3817 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3818 3819 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3820 index = self._index 3821 3822 if not self._match(TokenType.INTERVAL) and match_interval: 3823 return None 3824 3825 if self._match(TokenType.STRING, advance=False): 3826 this = self._parse_primary() 3827 else: 3828 this = self._parse_term() 3829 3830 if not this or ( 3831 isinstance(this, exp.Column) 3832 and not this.table 3833 and not this.this.quoted 3834 and this.name.upper() == "IS" 3835 ): 3836 self._retreat(index) 3837 return None 3838 3839 unit = self._parse_function() or ( 3840 not self._match(TokenType.ALIAS, advance=False) 3841 and self._parse_var(any_token=True, upper=True) 3842 ) 3843 3844 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3845 # each INTERVAL expression into this canonical form so it's easy to transpile 3846 if this and this.is_number: 3847 this = exp.Literal.string(this.name) 3848 elif this and this.is_string: 3849 parts = this.name.split() 3850 3851 if len(parts) == 2: 3852 if unit: 3853 # This is not actually a unit, it's something else (e.g. a "window side") 3854 unit = None 3855 self._retreat(self._index - 1) 3856 3857 this = exp.Literal.string(parts[0]) 3858 unit = self.expression(exp.Var, this=parts[1].upper()) 3859 3860 return self.expression(exp.Interval, this=this, unit=unit) 3861 3862 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3863 this = self._parse_term() 3864 3865 while True: 3866 if self._match_set(self.BITWISE): 3867 this = self.expression( 3868 self.BITWISE[self._prev.token_type], 3869 this=this, 3870 expression=self._parse_term(), 3871 ) 3872 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3873 this = self.expression( 3874 exp.DPipe, 3875 this=this, 3876 expression=self._parse_term(), 3877 safe=not self.dialect.STRICT_STRING_CONCAT, 3878 ) 3879 elif self._match(TokenType.DQMARK): 3880 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3881 elif self._match_pair(TokenType.LT, TokenType.LT): 3882 this = self.expression( 3883 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3884 ) 3885 elif self._match_pair(TokenType.GT, TokenType.GT): 3886 this = self.expression( 3887 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3888 ) 3889 else: 3890 break 3891 3892 return this 3893 3894 def _parse_term(self) -> t.Optional[exp.Expression]: 3895 return self._parse_tokens(self._parse_factor, self.TERM) 3896 3897 def _parse_factor(self) -> t.Optional[exp.Expression]: 3898 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3899 this = parse_method() 3900 3901 while self._match_set(self.FACTOR): 3902 this = self.expression( 3903 self.FACTOR[self._prev.token_type], 3904 this=this, 3905 comments=self._prev_comments, 3906 expression=parse_method(), 3907 ) 3908 if isinstance(this, exp.Div): 3909 this.args["typed"] = self.dialect.TYPED_DIVISION 3910 this.args["safe"] = self.dialect.SAFE_DIVISION 3911 3912 return this 3913 3914 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3915 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3916 3917 def _parse_unary(self) -> t.Optional[exp.Expression]: 3918 if self._match_set(self.UNARY_PARSERS): 3919 return self.UNARY_PARSERS[self._prev.token_type](self) 3920 return self._parse_at_time_zone(self._parse_type()) 3921 3922 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3923 interval = parse_interval and self._parse_interval() 3924 if interval: 3925 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3926 while True: 3927 index = self._index 3928 self._match(TokenType.PLUS) 3929 3930 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3931 self._retreat(index) 3932 break 3933 3934 interval = self.expression( # type: ignore 3935 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3936 ) 3937 3938 return interval 3939 3940 index = self._index 3941 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3942 this = self._parse_column() 3943 3944 if data_type: 3945 if isinstance(this, exp.Literal): 3946 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3947 if parser: 3948 return parser(self, this, data_type) 3949 return self.expression(exp.Cast, this=this, to=data_type) 3950 if not data_type.expressions: 3951 self._retreat(index) 3952 return self._parse_column() 3953 return self._parse_column_ops(data_type) 3954 3955 return this and self._parse_column_ops(this) 3956 3957 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3958 this = self._parse_type() 3959 if not this: 3960 return None 3961 3962 if isinstance(this, exp.Column) and not this.table: 3963 this = exp.var(this.name.upper()) 3964 3965 return self.expression( 3966 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3967 ) 3968 3969 def _parse_types( 3970 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3971 ) -> t.Optional[exp.Expression]: 3972 index = self._index 3973 3974 prefix = self._match_text_seq("SYSUDTLIB", ".") 3975 3976 if not self._match_set(self.TYPE_TOKENS): 3977 identifier = allow_identifiers and self._parse_id_var( 3978 any_token=False, tokens=(TokenType.VAR,) 3979 ) 3980 if identifier: 3981 tokens = self.dialect.tokenize(identifier.name) 3982 3983 if len(tokens) != 1: 3984 self.raise_error("Unexpected identifier", self._prev) 3985 3986 if tokens[0].token_type in self.TYPE_TOKENS: 3987 self._prev = tokens[0] 3988 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3989 type_name = identifier.name 3990 3991 while self._match(TokenType.DOT): 3992 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3993 3994 return exp.DataType.build(type_name, udt=True) 3995 else: 3996 self._retreat(self._index - 1) 3997 return None 3998 else: 3999 return None 4000 4001 type_token = self._prev.token_type 4002 4003 if type_token == TokenType.PSEUDO_TYPE: 4004 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4005 4006 if type_token == TokenType.OBJECT_IDENTIFIER: 4007 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4008 4009 nested = type_token in self.NESTED_TYPE_TOKENS 4010 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4011 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4012 expressions = None 4013 maybe_func = False 4014 4015 if self._match(TokenType.L_PAREN): 4016 if is_struct: 4017 expressions = self._parse_csv(self._parse_struct_types) 4018 elif nested: 4019 expressions = self._parse_csv( 4020 lambda: self._parse_types( 4021 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4022 ) 4023 ) 4024 elif type_token in self.ENUM_TYPE_TOKENS: 4025 expressions = self._parse_csv(self._parse_equality) 4026 elif is_aggregate: 4027 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4028 any_token=False, tokens=(TokenType.VAR,) 4029 ) 4030 if not func_or_ident or not self._match(TokenType.COMMA): 4031 return None 4032 expressions = self._parse_csv( 4033 lambda: self._parse_types( 4034 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4035 ) 4036 ) 4037 expressions.insert(0, func_or_ident) 4038 else: 4039 expressions = self._parse_csv(self._parse_type_size) 4040 4041 if not expressions or not self._match(TokenType.R_PAREN): 4042 self._retreat(index) 4043 return None 4044 4045 maybe_func = True 4046 4047 this: t.Optional[exp.Expression] = None 4048 values: t.Optional[t.List[exp.Expression]] = None 4049 4050 if nested and self._match(TokenType.LT): 4051 if is_struct: 4052 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4053 else: 4054 expressions = self._parse_csv( 4055 lambda: self._parse_types( 4056 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4057 ) 4058 ) 4059 4060 if not self._match(TokenType.GT): 4061 self.raise_error("Expecting >") 4062 4063 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4064 values = self._parse_csv(self._parse_conjunction) 4065 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4066 4067 if type_token in self.TIMESTAMPS: 4068 if self._match_text_seq("WITH", "TIME", "ZONE"): 4069 maybe_func = False 4070 tz_type = ( 4071 exp.DataType.Type.TIMETZ 4072 if type_token in self.TIMES 4073 else exp.DataType.Type.TIMESTAMPTZ 4074 ) 4075 this = exp.DataType(this=tz_type, expressions=expressions) 4076 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4077 maybe_func = False 4078 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4079 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4080 maybe_func = False 4081 elif type_token == TokenType.INTERVAL: 4082 unit = self._parse_var() 4083 4084 if self._match_text_seq("TO"): 4085 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 4086 else: 4087 span = None 4088 4089 if span or not unit: 4090 this = self.expression( 4091 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 4092 ) 4093 else: 4094 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4095 4096 if maybe_func and check_func: 4097 index2 = self._index 4098 peek = self._parse_string() 4099 4100 if not peek: 4101 self._retreat(index) 4102 return None 4103 4104 self._retreat(index2) 4105 4106 if not this: 4107 if self._match_text_seq("UNSIGNED"): 4108 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4109 if not unsigned_type_token: 4110 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4111 4112 type_token = unsigned_type_token or type_token 4113 4114 this = exp.DataType( 4115 this=exp.DataType.Type[type_token.value], 4116 expressions=expressions, 4117 nested=nested, 4118 values=values, 4119 prefix=prefix, 4120 ) 4121 4122 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4123 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4124 4125 return this 4126 4127 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4128 index = self._index 4129 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4130 self._match(TokenType.COLON) 4131 column_def = self._parse_column_def(this) 4132 4133 if type_required and ( 4134 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4135 ): 4136 self._retreat(index) 4137 return self._parse_types() 4138 4139 return column_def 4140 4141 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4142 if not self._match_text_seq("AT", "TIME", "ZONE"): 4143 return this 4144 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4145 4146 def _parse_column(self) -> t.Optional[exp.Expression]: 4147 this = self._parse_column_reference() 4148 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4149 4150 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4151 this = self._parse_field() 4152 if ( 4153 not this 4154 and self._match(TokenType.VALUES, advance=False) 4155 and self.VALUES_FOLLOWED_BY_PAREN 4156 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4157 ): 4158 this = self._parse_id_var() 4159 4160 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 4161 4162 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4163 this = self._parse_bracket(this) 4164 4165 while self._match_set(self.COLUMN_OPERATORS): 4166 op_token = self._prev.token_type 4167 op = self.COLUMN_OPERATORS.get(op_token) 4168 4169 if op_token == TokenType.DCOLON: 4170 field = self._parse_types() 4171 if not field: 4172 self.raise_error("Expected type") 4173 elif op and self._curr: 4174 field = self._parse_column_reference() 4175 else: 4176 field = self._parse_field(anonymous_func=True, any_token=True) 4177 4178 if isinstance(field, exp.Func) and this: 4179 # bigquery allows function calls like x.y.count(...) 4180 # SAFE.SUBSTR(...) 4181 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4182 this = exp.replace_tree( 4183 this, 4184 lambda n: ( 4185 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4186 if n.table 4187 else n.this 4188 ) 4189 if isinstance(n, exp.Column) 4190 else n, 4191 ) 4192 4193 if op: 4194 this = op(self, this, field) 4195 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4196 this = self.expression( 4197 exp.Column, 4198 this=field, 4199 table=this.this, 4200 db=this.args.get("table"), 4201 catalog=this.args.get("db"), 4202 ) 4203 else: 4204 this = self.expression(exp.Dot, this=this, expression=field) 4205 this = self._parse_bracket(this) 4206 return this 4207 4208 def _parse_primary(self) -> t.Optional[exp.Expression]: 4209 if self._match_set(self.PRIMARY_PARSERS): 4210 token_type = self._prev.token_type 4211 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4212 4213 if token_type == TokenType.STRING: 4214 expressions = [primary] 4215 while self._match(TokenType.STRING): 4216 expressions.append(exp.Literal.string(self._prev.text)) 4217 4218 if len(expressions) > 1: 4219 return self.expression(exp.Concat, expressions=expressions) 4220 4221 return primary 4222 4223 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4224 return exp.Literal.number(f"0.{self._prev.text}") 4225 4226 if self._match(TokenType.L_PAREN): 4227 comments = self._prev_comments 4228 query = self._parse_select() 4229 4230 if query: 4231 expressions = [query] 4232 else: 4233 expressions = self._parse_expressions() 4234 4235 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4236 4237 if isinstance(this, exp.UNWRAPPED_QUERIES): 4238 this = self._parse_set_operations( 4239 self._parse_subquery(this=this, parse_alias=False) 4240 ) 4241 elif isinstance(this, exp.Subquery): 4242 this = self._parse_subquery( 4243 this=self._parse_set_operations(this), parse_alias=False 4244 ) 4245 elif len(expressions) > 1: 4246 this = self.expression(exp.Tuple, expressions=expressions) 4247 else: 4248 this = self.expression(exp.Paren, this=this) 4249 4250 if this: 4251 this.add_comments(comments) 4252 4253 self._match_r_paren(expression=this) 4254 return this 4255 4256 return None 4257 4258 def _parse_field( 4259 self, 4260 any_token: bool = False, 4261 tokens: t.Optional[t.Collection[TokenType]] = None, 4262 anonymous_func: bool = False, 4263 ) -> t.Optional[exp.Expression]: 4264 return ( 4265 self._parse_primary() 4266 or self._parse_function(anonymous=anonymous_func) 4267 or self._parse_id_var(any_token=any_token, tokens=tokens) 4268 ) 4269 4270 def _parse_function( 4271 self, 4272 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4273 anonymous: bool = False, 4274 optional_parens: bool = True, 4275 ) -> t.Optional[exp.Expression]: 4276 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4277 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4278 fn_syntax = False 4279 if ( 4280 self._match(TokenType.L_BRACE, advance=False) 4281 and self._next 4282 and self._next.text.upper() == "FN" 4283 ): 4284 self._advance(2) 4285 fn_syntax = True 4286 4287 func = self._parse_function_call( 4288 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4289 ) 4290 4291 if fn_syntax: 4292 self._match(TokenType.R_BRACE) 4293 4294 return func 4295 4296 def _parse_function_call( 4297 self, 4298 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4299 anonymous: bool = False, 4300 optional_parens: bool = True, 4301 ) -> t.Optional[exp.Expression]: 4302 if not self._curr: 4303 return None 4304 4305 comments = self._curr.comments 4306 token_type = self._curr.token_type 4307 this = self._curr.text 4308 upper = this.upper() 4309 4310 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4311 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4312 self._advance() 4313 return self._parse_window(parser(self)) 4314 4315 if not self._next or self._next.token_type != TokenType.L_PAREN: 4316 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4317 self._advance() 4318 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4319 4320 return None 4321 4322 if token_type not in self.FUNC_TOKENS: 4323 return None 4324 4325 self._advance(2) 4326 4327 parser = self.FUNCTION_PARSERS.get(upper) 4328 if parser and not anonymous: 4329 this = parser(self) 4330 else: 4331 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4332 4333 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4334 this = self.expression(subquery_predicate, this=self._parse_select()) 4335 self._match_r_paren() 4336 return this 4337 4338 if functions is None: 4339 functions = self.FUNCTIONS 4340 4341 function = functions.get(upper) 4342 4343 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4344 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4345 4346 if alias: 4347 args = self._kv_to_prop_eq(args) 4348 4349 if function and not anonymous: 4350 if "dialect" in function.__code__.co_varnames: 4351 func = function(args, dialect=self.dialect) 4352 else: 4353 func = function(args) 4354 4355 func = self.validate_expression(func, args) 4356 if not self.dialect.NORMALIZE_FUNCTIONS: 4357 func.meta["name"] = this 4358 4359 this = func 4360 else: 4361 if token_type == TokenType.IDENTIFIER: 4362 this = exp.Identifier(this=this, quoted=True) 4363 this = self.expression(exp.Anonymous, this=this, expressions=args) 4364 4365 if isinstance(this, exp.Expression): 4366 this.add_comments(comments) 4367 4368 self._match_r_paren(this) 4369 return self._parse_window(this) 4370 4371 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4372 transformed = [] 4373 4374 for e in expressions: 4375 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4376 if isinstance(e, exp.Alias): 4377 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4378 4379 if not isinstance(e, exp.PropertyEQ): 4380 e = self.expression( 4381 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4382 ) 4383 4384 if isinstance(e.this, exp.Column): 4385 e.this.replace(e.this.this) 4386 4387 transformed.append(e) 4388 4389 return transformed 4390 4391 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4392 return self._parse_column_def(self._parse_id_var()) 4393 4394 def _parse_user_defined_function( 4395 self, kind: t.Optional[TokenType] = None 4396 ) -> t.Optional[exp.Expression]: 4397 this = self._parse_id_var() 4398 4399 while self._match(TokenType.DOT): 4400 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4401 4402 if not self._match(TokenType.L_PAREN): 4403 return this 4404 4405 expressions = self._parse_csv(self._parse_function_parameter) 4406 self._match_r_paren() 4407 return self.expression( 4408 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4409 ) 4410 4411 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4412 literal = self._parse_primary() 4413 if literal: 4414 return self.expression(exp.Introducer, this=token.text, expression=literal) 4415 4416 return self.expression(exp.Identifier, this=token.text) 4417 4418 def _parse_session_parameter(self) -> exp.SessionParameter: 4419 kind = None 4420 this = self._parse_id_var() or self._parse_primary() 4421 4422 if this and self._match(TokenType.DOT): 4423 kind = this.name 4424 this = self._parse_var() or self._parse_primary() 4425 4426 return self.expression(exp.SessionParameter, this=this, kind=kind) 4427 4428 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4429 index = self._index 4430 4431 if self._match(TokenType.L_PAREN): 4432 expressions = t.cast( 4433 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4434 ) 4435 4436 if not self._match(TokenType.R_PAREN): 4437 self._retreat(index) 4438 else: 4439 expressions = [self._parse_id_var()] 4440 4441 if self._match_set(self.LAMBDAS): 4442 return self.LAMBDAS[self._prev.token_type](self, expressions) 4443 4444 self._retreat(index) 4445 4446 this: t.Optional[exp.Expression] 4447 4448 if self._match(TokenType.DISTINCT): 4449 this = self.expression( 4450 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4451 ) 4452 else: 4453 this = self._parse_select_or_expression(alias=alias) 4454 4455 return self._parse_limit( 4456 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4457 ) 4458 4459 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4460 index = self._index 4461 4462 if not self._match(TokenType.L_PAREN): 4463 return this 4464 4465 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4466 # expr can be of both types 4467 if self._match_set(self.SELECT_START_TOKENS): 4468 self._retreat(index) 4469 return this 4470 4471 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4472 4473 self._match_r_paren() 4474 return self.expression(exp.Schema, this=this, expressions=args) 4475 4476 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4477 return self._parse_column_def(self._parse_field(any_token=True)) 4478 4479 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4480 # column defs are not really columns, they're identifiers 4481 if isinstance(this, exp.Column): 4482 this = this.this 4483 4484 kind = self._parse_types(schema=True) 4485 4486 if self._match_text_seq("FOR", "ORDINALITY"): 4487 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4488 4489 constraints: t.List[exp.Expression] = [] 4490 4491 if not kind and self._match(TokenType.ALIAS): 4492 constraints.append( 4493 self.expression( 4494 exp.ComputedColumnConstraint, 4495 this=self._parse_conjunction(), 4496 persisted=self._match_text_seq("PERSISTED"), 4497 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4498 ) 4499 ) 4500 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4501 self._match(TokenType.ALIAS) 4502 constraints.append( 4503 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4504 ) 4505 4506 while True: 4507 constraint = self._parse_column_constraint() 4508 if not constraint: 4509 break 4510 constraints.append(constraint) 4511 4512 if not kind and not constraints: 4513 return this 4514 4515 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4516 4517 def _parse_auto_increment( 4518 self, 4519 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4520 start = None 4521 increment = None 4522 4523 if self._match(TokenType.L_PAREN, advance=False): 4524 args = self._parse_wrapped_csv(self._parse_bitwise) 4525 start = seq_get(args, 0) 4526 increment = seq_get(args, 1) 4527 elif self._match_text_seq("START"): 4528 start = self._parse_bitwise() 4529 self._match_text_seq("INCREMENT") 4530 increment = self._parse_bitwise() 4531 4532 if start and increment: 4533 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4534 4535 return exp.AutoIncrementColumnConstraint() 4536 4537 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4538 if not self._match_text_seq("REFRESH"): 4539 self._retreat(self._index - 1) 4540 return None 4541 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4542 4543 def _parse_compress(self) -> exp.CompressColumnConstraint: 4544 if self._match(TokenType.L_PAREN, advance=False): 4545 return self.expression( 4546 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4547 ) 4548 4549 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4550 4551 def _parse_generated_as_identity( 4552 self, 4553 ) -> ( 4554 exp.GeneratedAsIdentityColumnConstraint 4555 | exp.ComputedColumnConstraint 4556 | exp.GeneratedAsRowColumnConstraint 4557 ): 4558 if self._match_text_seq("BY", "DEFAULT"): 4559 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4560 this = self.expression( 4561 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4562 ) 4563 else: 4564 self._match_text_seq("ALWAYS") 4565 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4566 4567 self._match(TokenType.ALIAS) 4568 4569 if self._match_text_seq("ROW"): 4570 start = self._match_text_seq("START") 4571 if not start: 4572 self._match(TokenType.END) 4573 hidden = self._match_text_seq("HIDDEN") 4574 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4575 4576 identity = self._match_text_seq("IDENTITY") 4577 4578 if self._match(TokenType.L_PAREN): 4579 if self._match(TokenType.START_WITH): 4580 this.set("start", self._parse_bitwise()) 4581 if self._match_text_seq("INCREMENT", "BY"): 4582 this.set("increment", self._parse_bitwise()) 4583 if self._match_text_seq("MINVALUE"): 4584 this.set("minvalue", self._parse_bitwise()) 4585 if self._match_text_seq("MAXVALUE"): 4586 this.set("maxvalue", self._parse_bitwise()) 4587 4588 if self._match_text_seq("CYCLE"): 4589 this.set("cycle", True) 4590 elif self._match_text_seq("NO", "CYCLE"): 4591 this.set("cycle", False) 4592 4593 if not identity: 4594 this.set("expression", self._parse_bitwise()) 4595 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4596 args = self._parse_csv(self._parse_bitwise) 4597 this.set("start", seq_get(args, 0)) 4598 this.set("increment", seq_get(args, 1)) 4599 4600 self._match_r_paren() 4601 4602 return this 4603 4604 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4605 self._match_text_seq("LENGTH") 4606 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4607 4608 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4609 if self._match_text_seq("NULL"): 4610 return self.expression(exp.NotNullColumnConstraint) 4611 if self._match_text_seq("CASESPECIFIC"): 4612 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4613 if self._match_text_seq("FOR", "REPLICATION"): 4614 return self.expression(exp.NotForReplicationColumnConstraint) 4615 return None 4616 4617 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4618 if self._match(TokenType.CONSTRAINT): 4619 this = self._parse_id_var() 4620 else: 4621 this = None 4622 4623 if self._match_texts(self.CONSTRAINT_PARSERS): 4624 return self.expression( 4625 exp.ColumnConstraint, 4626 this=this, 4627 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4628 ) 4629 4630 return this 4631 4632 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4633 if not self._match(TokenType.CONSTRAINT): 4634 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4635 4636 return self.expression( 4637 exp.Constraint, 4638 this=self._parse_id_var(), 4639 expressions=self._parse_unnamed_constraints(), 4640 ) 4641 4642 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4643 constraints = [] 4644 while True: 4645 constraint = self._parse_unnamed_constraint() or self._parse_function() 4646 if not constraint: 4647 break 4648 constraints.append(constraint) 4649 4650 return constraints 4651 4652 def _parse_unnamed_constraint( 4653 self, constraints: t.Optional[t.Collection[str]] = None 4654 ) -> t.Optional[exp.Expression]: 4655 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4656 constraints or self.CONSTRAINT_PARSERS 4657 ): 4658 return None 4659 4660 constraint = self._prev.text.upper() 4661 if constraint not in self.CONSTRAINT_PARSERS: 4662 self.raise_error(f"No parser found for schema constraint {constraint}.") 4663 4664 return self.CONSTRAINT_PARSERS[constraint](self) 4665 4666 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4667 self._match_text_seq("KEY") 4668 return self.expression( 4669 exp.UniqueColumnConstraint, 4670 this=self._parse_schema(self._parse_id_var(any_token=False)), 4671 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4672 on_conflict=self._parse_on_conflict(), 4673 ) 4674 4675 def _parse_key_constraint_options(self) -> t.List[str]: 4676 options = [] 4677 while True: 4678 if not self._curr: 4679 break 4680 4681 if self._match(TokenType.ON): 4682 action = None 4683 on = self._advance_any() and self._prev.text 4684 4685 if self._match_text_seq("NO", "ACTION"): 4686 action = "NO ACTION" 4687 elif self._match_text_seq("CASCADE"): 4688 action = "CASCADE" 4689 elif self._match_text_seq("RESTRICT"): 4690 action = "RESTRICT" 4691 elif self._match_pair(TokenType.SET, TokenType.NULL): 4692 action = "SET NULL" 4693 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4694 action = "SET DEFAULT" 4695 else: 4696 self.raise_error("Invalid key constraint") 4697 4698 options.append(f"ON {on} {action}") 4699 elif self._match_text_seq("NOT", "ENFORCED"): 4700 options.append("NOT ENFORCED") 4701 elif self._match_text_seq("DEFERRABLE"): 4702 options.append("DEFERRABLE") 4703 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4704 options.append("INITIALLY DEFERRED") 4705 elif self._match_text_seq("NORELY"): 4706 options.append("NORELY") 4707 elif self._match_text_seq("MATCH", "FULL"): 4708 options.append("MATCH FULL") 4709 else: 4710 break 4711 4712 return options 4713 4714 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4715 if match and not self._match(TokenType.REFERENCES): 4716 return None 4717 4718 expressions = None 4719 this = self._parse_table(schema=True) 4720 options = self._parse_key_constraint_options() 4721 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4722 4723 def _parse_foreign_key(self) -> exp.ForeignKey: 4724 expressions = self._parse_wrapped_id_vars() 4725 reference = self._parse_references() 4726 options = {} 4727 4728 while self._match(TokenType.ON): 4729 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4730 self.raise_error("Expected DELETE or UPDATE") 4731 4732 kind = self._prev.text.lower() 4733 4734 if self._match_text_seq("NO", "ACTION"): 4735 action = "NO ACTION" 4736 elif self._match(TokenType.SET): 4737 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4738 action = "SET " + self._prev.text.upper() 4739 else: 4740 self._advance() 4741 action = self._prev.text.upper() 4742 4743 options[kind] = action 4744 4745 return self.expression( 4746 exp.ForeignKey, 4747 expressions=expressions, 4748 reference=reference, 4749 **options, # type: ignore 4750 ) 4751 4752 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4753 return self._parse_field() 4754 4755 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4756 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4757 self._retreat(self._index - 1) 4758 return None 4759 4760 id_vars = self._parse_wrapped_id_vars() 4761 return self.expression( 4762 exp.PeriodForSystemTimeConstraint, 4763 this=seq_get(id_vars, 0), 4764 expression=seq_get(id_vars, 1), 4765 ) 4766 4767 def _parse_primary_key( 4768 self, wrapped_optional: bool = False, in_props: bool = False 4769 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4770 desc = ( 4771 self._match_set((TokenType.ASC, TokenType.DESC)) 4772 and self._prev.token_type == TokenType.DESC 4773 ) 4774 4775 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4776 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4777 4778 expressions = self._parse_wrapped_csv( 4779 self._parse_primary_key_part, optional=wrapped_optional 4780 ) 4781 options = self._parse_key_constraint_options() 4782 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4783 4784 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4785 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4786 4787 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4788 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4789 return this 4790 4791 bracket_kind = self._prev.token_type 4792 expressions = self._parse_csv( 4793 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4794 ) 4795 4796 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4797 self.raise_error("Expected ]") 4798 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4799 self.raise_error("Expected }") 4800 4801 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4802 if bracket_kind == TokenType.L_BRACE: 4803 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4804 elif not this or this.name.upper() == "ARRAY": 4805 this = self.expression(exp.Array, expressions=expressions) 4806 else: 4807 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4808 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4809 4810 self._add_comments(this) 4811 return self._parse_bracket(this) 4812 4813 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4814 if self._match(TokenType.COLON): 4815 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4816 return this 4817 4818 def _parse_case(self) -> t.Optional[exp.Expression]: 4819 ifs = [] 4820 default = None 4821 4822 comments = self._prev_comments 4823 expression = self._parse_conjunction() 4824 4825 while self._match(TokenType.WHEN): 4826 this = self._parse_conjunction() 4827 self._match(TokenType.THEN) 4828 then = self._parse_conjunction() 4829 ifs.append(self.expression(exp.If, this=this, true=then)) 4830 4831 if self._match(TokenType.ELSE): 4832 default = self._parse_conjunction() 4833 4834 if not self._match(TokenType.END): 4835 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4836 default = exp.column("interval") 4837 else: 4838 self.raise_error("Expected END after CASE", self._prev) 4839 4840 return self.expression( 4841 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4842 ) 4843 4844 def _parse_if(self) -> t.Optional[exp.Expression]: 4845 if self._match(TokenType.L_PAREN): 4846 args = self._parse_csv(self._parse_conjunction) 4847 this = self.validate_expression(exp.If.from_arg_list(args), args) 4848 self._match_r_paren() 4849 else: 4850 index = self._index - 1 4851 4852 if self.NO_PAREN_IF_COMMANDS and index == 0: 4853 return self._parse_as_command(self._prev) 4854 4855 condition = self._parse_conjunction() 4856 4857 if not condition: 4858 self._retreat(index) 4859 return None 4860 4861 self._match(TokenType.THEN) 4862 true = self._parse_conjunction() 4863 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4864 self._match(TokenType.END) 4865 this = self.expression(exp.If, this=condition, true=true, false=false) 4866 4867 return this 4868 4869 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4870 if not self._match_text_seq("VALUE", "FOR"): 4871 self._retreat(self._index - 1) 4872 return None 4873 4874 return self.expression( 4875 exp.NextValueFor, 4876 this=self._parse_column(), 4877 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4878 ) 4879 4880 def _parse_extract(self) -> exp.Extract: 4881 this = self._parse_function() or self._parse_var() or self._parse_type() 4882 4883 if self._match(TokenType.FROM): 4884 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4885 4886 if not self._match(TokenType.COMMA): 4887 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4888 4889 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4890 4891 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4892 this = self._parse_conjunction() 4893 4894 if not self._match(TokenType.ALIAS): 4895 if self._match(TokenType.COMMA): 4896 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4897 4898 self.raise_error("Expected AS after CAST") 4899 4900 fmt = None 4901 to = self._parse_types() 4902 4903 if self._match(TokenType.FORMAT): 4904 fmt_string = self._parse_string() 4905 fmt = self._parse_at_time_zone(fmt_string) 4906 4907 if not to: 4908 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4909 if to.this in exp.DataType.TEMPORAL_TYPES: 4910 this = self.expression( 4911 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4912 this=this, 4913 format=exp.Literal.string( 4914 format_time( 4915 fmt_string.this if fmt_string else "", 4916 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4917 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4918 ) 4919 ), 4920 ) 4921 4922 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4923 this.set("zone", fmt.args["zone"]) 4924 return this 4925 elif not to: 4926 self.raise_error("Expected TYPE after CAST") 4927 elif isinstance(to, exp.Identifier): 4928 to = exp.DataType.build(to.name, udt=True) 4929 elif to.this == exp.DataType.Type.CHAR: 4930 if self._match(TokenType.CHARACTER_SET): 4931 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4932 4933 return self.expression( 4934 exp.Cast if strict else exp.TryCast, 4935 this=this, 4936 to=to, 4937 format=fmt, 4938 safe=safe, 4939 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 4940 ) 4941 4942 def _parse_string_agg(self) -> exp.Expression: 4943 if self._match(TokenType.DISTINCT): 4944 args: t.List[t.Optional[exp.Expression]] = [ 4945 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4946 ] 4947 if self._match(TokenType.COMMA): 4948 args.extend(self._parse_csv(self._parse_conjunction)) 4949 else: 4950 args = self._parse_csv(self._parse_conjunction) # type: ignore 4951 4952 index = self._index 4953 if not self._match(TokenType.R_PAREN) and args: 4954 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4955 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4956 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4957 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4958 4959 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4960 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4961 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4962 if not self._match_text_seq("WITHIN", "GROUP"): 4963 self._retreat(index) 4964 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4965 4966 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4967 order = self._parse_order(this=seq_get(args, 0)) 4968 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4969 4970 def _parse_convert( 4971 self, strict: bool, safe: t.Optional[bool] = None 4972 ) -> t.Optional[exp.Expression]: 4973 this = self._parse_bitwise() 4974 4975 if self._match(TokenType.USING): 4976 to: t.Optional[exp.Expression] = self.expression( 4977 exp.CharacterSet, this=self._parse_var() 4978 ) 4979 elif self._match(TokenType.COMMA): 4980 to = self._parse_types() 4981 else: 4982 to = None 4983 4984 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4985 4986 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4987 """ 4988 There are generally two variants of the DECODE function: 4989 4990 - DECODE(bin, charset) 4991 - DECODE(expression, search, result [, search, result] ... [, default]) 4992 4993 The second variant will always be parsed into a CASE expression. Note that NULL 4994 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4995 instead of relying on pattern matching. 4996 """ 4997 args = self._parse_csv(self._parse_conjunction) 4998 4999 if len(args) < 3: 5000 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5001 5002 expression, *expressions = args 5003 if not expression: 5004 return None 5005 5006 ifs = [] 5007 for search, result in zip(expressions[::2], expressions[1::2]): 5008 if not search or not result: 5009 return None 5010 5011 if isinstance(search, exp.Literal): 5012 ifs.append( 5013 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5014 ) 5015 elif isinstance(search, exp.Null): 5016 ifs.append( 5017 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5018 ) 5019 else: 5020 cond = exp.or_( 5021 exp.EQ(this=expression.copy(), expression=search), 5022 exp.and_( 5023 exp.Is(this=expression.copy(), expression=exp.Null()), 5024 exp.Is(this=search.copy(), expression=exp.Null()), 5025 copy=False, 5026 ), 5027 copy=False, 5028 ) 5029 ifs.append(exp.If(this=cond, true=result)) 5030 5031 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5032 5033 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5034 self._match_text_seq("KEY") 5035 key = self._parse_column() 5036 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5037 self._match_text_seq("VALUE") 5038 value = self._parse_bitwise() 5039 5040 if not key and not value: 5041 return None 5042 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5043 5044 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5045 if not this or not self._match_text_seq("FORMAT", "JSON"): 5046 return this 5047 5048 return self.expression(exp.FormatJson, this=this) 5049 5050 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5051 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5052 for value in values: 5053 if self._match_text_seq(value, "ON", on): 5054 return f"{value} ON {on}" 5055 5056 return None 5057 5058 @t.overload 5059 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5060 5061 @t.overload 5062 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5063 5064 def _parse_json_object(self, agg=False): 5065 star = self._parse_star() 5066 expressions = ( 5067 [star] 5068 if star 5069 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5070 ) 5071 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5072 5073 unique_keys = None 5074 if self._match_text_seq("WITH", "UNIQUE"): 5075 unique_keys = True 5076 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5077 unique_keys = False 5078 5079 self._match_text_seq("KEYS") 5080 5081 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5082 self._parse_type() 5083 ) 5084 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5085 5086 return self.expression( 5087 exp.JSONObjectAgg if agg else exp.JSONObject, 5088 expressions=expressions, 5089 null_handling=null_handling, 5090 unique_keys=unique_keys, 5091 return_type=return_type, 5092 encoding=encoding, 5093 ) 5094 5095 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5096 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5097 if not self._match_text_seq("NESTED"): 5098 this = self._parse_id_var() 5099 kind = self._parse_types(allow_identifiers=False) 5100 nested = None 5101 else: 5102 this = None 5103 kind = None 5104 nested = True 5105 5106 path = self._match_text_seq("PATH") and self._parse_string() 5107 nested_schema = nested and self._parse_json_schema() 5108 5109 return self.expression( 5110 exp.JSONColumnDef, 5111 this=this, 5112 kind=kind, 5113 path=path, 5114 nested_schema=nested_schema, 5115 ) 5116 5117 def _parse_json_schema(self) -> exp.JSONSchema: 5118 self._match_text_seq("COLUMNS") 5119 return self.expression( 5120 exp.JSONSchema, 5121 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5122 ) 5123 5124 def _parse_json_table(self) -> exp.JSONTable: 5125 this = self._parse_format_json(self._parse_bitwise()) 5126 path = self._match(TokenType.COMMA) and self._parse_string() 5127 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5128 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5129 schema = self._parse_json_schema() 5130 5131 return exp.JSONTable( 5132 this=this, 5133 schema=schema, 5134 path=path, 5135 error_handling=error_handling, 5136 empty_handling=empty_handling, 5137 ) 5138 5139 def _parse_match_against(self) -> exp.MatchAgainst: 5140 expressions = self._parse_csv(self._parse_column) 5141 5142 self._match_text_seq(")", "AGAINST", "(") 5143 5144 this = self._parse_string() 5145 5146 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5147 modifier = "IN NATURAL LANGUAGE MODE" 5148 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5149 modifier = f"{modifier} WITH QUERY EXPANSION" 5150 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5151 modifier = "IN BOOLEAN MODE" 5152 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5153 modifier = "WITH QUERY EXPANSION" 5154 else: 5155 modifier = None 5156 5157 return self.expression( 5158 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5159 ) 5160 5161 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5162 def _parse_open_json(self) -> exp.OpenJSON: 5163 this = self._parse_bitwise() 5164 path = self._match(TokenType.COMMA) and self._parse_string() 5165 5166 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5167 this = self._parse_field(any_token=True) 5168 kind = self._parse_types() 5169 path = self._parse_string() 5170 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5171 5172 return self.expression( 5173 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5174 ) 5175 5176 expressions = None 5177 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5178 self._match_l_paren() 5179 expressions = self._parse_csv(_parse_open_json_column_def) 5180 5181 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5182 5183 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5184 args = self._parse_csv(self._parse_bitwise) 5185 5186 if self._match(TokenType.IN): 5187 return self.expression( 5188 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5189 ) 5190 5191 if haystack_first: 5192 haystack = seq_get(args, 0) 5193 needle = seq_get(args, 1) 5194 else: 5195 needle = seq_get(args, 0) 5196 haystack = seq_get(args, 1) 5197 5198 return self.expression( 5199 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5200 ) 5201 5202 def _parse_predict(self) -> exp.Predict: 5203 self._match_text_seq("MODEL") 5204 this = self._parse_table() 5205 5206 self._match(TokenType.COMMA) 5207 self._match_text_seq("TABLE") 5208 5209 return self.expression( 5210 exp.Predict, 5211 this=this, 5212 expression=self._parse_table(), 5213 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5214 ) 5215 5216 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5217 args = self._parse_csv(self._parse_table) 5218 return exp.JoinHint(this=func_name.upper(), expressions=args) 5219 5220 def _parse_substring(self) -> exp.Substring: 5221 # Postgres supports the form: substring(string [from int] [for int]) 5222 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5223 5224 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5225 5226 if self._match(TokenType.FROM): 5227 args.append(self._parse_bitwise()) 5228 if self._match(TokenType.FOR): 5229 args.append(self._parse_bitwise()) 5230 5231 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5232 5233 def _parse_trim(self) -> exp.Trim: 5234 # https://www.w3resource.com/sql/character-functions/trim.php 5235 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5236 5237 position = None 5238 collation = None 5239 expression = None 5240 5241 if self._match_texts(self.TRIM_TYPES): 5242 position = self._prev.text.upper() 5243 5244 this = self._parse_bitwise() 5245 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5246 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5247 expression = self._parse_bitwise() 5248 5249 if invert_order: 5250 this, expression = expression, this 5251 5252 if self._match(TokenType.COLLATE): 5253 collation = self._parse_bitwise() 5254 5255 return self.expression( 5256 exp.Trim, this=this, position=position, expression=expression, collation=collation 5257 ) 5258 5259 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5260 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5261 5262 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5263 return self._parse_window(self._parse_id_var(), alias=True) 5264 5265 def _parse_respect_or_ignore_nulls( 5266 self, this: t.Optional[exp.Expression] 5267 ) -> t.Optional[exp.Expression]: 5268 if self._match_text_seq("IGNORE", "NULLS"): 5269 return self.expression(exp.IgnoreNulls, this=this) 5270 if self._match_text_seq("RESPECT", "NULLS"): 5271 return self.expression(exp.RespectNulls, this=this) 5272 return this 5273 5274 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5275 if self._match(TokenType.HAVING): 5276 self._match_texts(("MAX", "MIN")) 5277 max = self._prev.text.upper() != "MIN" 5278 return self.expression( 5279 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5280 ) 5281 5282 return this 5283 5284 def _parse_window( 5285 self, this: t.Optional[exp.Expression], alias: bool = False 5286 ) -> t.Optional[exp.Expression]: 5287 func = this 5288 comments = func.comments if isinstance(func, exp.Expression) else None 5289 5290 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5291 self._match(TokenType.WHERE) 5292 this = self.expression( 5293 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5294 ) 5295 self._match_r_paren() 5296 5297 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5298 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5299 if self._match_text_seq("WITHIN", "GROUP"): 5300 order = self._parse_wrapped(self._parse_order) 5301 this = self.expression(exp.WithinGroup, this=this, expression=order) 5302 5303 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5304 # Some dialects choose to implement and some do not. 5305 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5306 5307 # There is some code above in _parse_lambda that handles 5308 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5309 5310 # The below changes handle 5311 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5312 5313 # Oracle allows both formats 5314 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5315 # and Snowflake chose to do the same for familiarity 5316 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5317 if isinstance(this, exp.AggFunc): 5318 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5319 5320 if ignore_respect and ignore_respect is not this: 5321 ignore_respect.replace(ignore_respect.this) 5322 this = self.expression(ignore_respect.__class__, this=this) 5323 5324 this = self._parse_respect_or_ignore_nulls(this) 5325 5326 # bigquery select from window x AS (partition by ...) 5327 if alias: 5328 over = None 5329 self._match(TokenType.ALIAS) 5330 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5331 return this 5332 else: 5333 over = self._prev.text.upper() 5334 5335 if comments: 5336 func.comments = None # type: ignore 5337 5338 if not self._match(TokenType.L_PAREN): 5339 return self.expression( 5340 exp.Window, 5341 comments=comments, 5342 this=this, 5343 alias=self._parse_id_var(False), 5344 over=over, 5345 ) 5346 5347 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5348 5349 first = self._match(TokenType.FIRST) 5350 if self._match_text_seq("LAST"): 5351 first = False 5352 5353 partition, order = self._parse_partition_and_order() 5354 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5355 5356 if kind: 5357 self._match(TokenType.BETWEEN) 5358 start = self._parse_window_spec() 5359 self._match(TokenType.AND) 5360 end = self._parse_window_spec() 5361 5362 spec = self.expression( 5363 exp.WindowSpec, 5364 kind=kind, 5365 start=start["value"], 5366 start_side=start["side"], 5367 end=end["value"], 5368 end_side=end["side"], 5369 ) 5370 else: 5371 spec = None 5372 5373 self._match_r_paren() 5374 5375 window = self.expression( 5376 exp.Window, 5377 comments=comments, 5378 this=this, 5379 partition_by=partition, 5380 order=order, 5381 spec=spec, 5382 alias=window_alias, 5383 over=over, 5384 first=first, 5385 ) 5386 5387 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5388 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5389 return self._parse_window(window, alias=alias) 5390 5391 return window 5392 5393 def _parse_partition_and_order( 5394 self, 5395 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5396 return self._parse_partition_by(), self._parse_order() 5397 5398 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5399 self._match(TokenType.BETWEEN) 5400 5401 return { 5402 "value": ( 5403 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5404 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5405 or self._parse_bitwise() 5406 ), 5407 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5408 } 5409 5410 def _parse_alias( 5411 self, this: t.Optional[exp.Expression], explicit: bool = False 5412 ) -> t.Optional[exp.Expression]: 5413 any_token = self._match(TokenType.ALIAS) 5414 comments = self._prev_comments 5415 5416 if explicit and not any_token: 5417 return this 5418 5419 if self._match(TokenType.L_PAREN): 5420 aliases = self.expression( 5421 exp.Aliases, 5422 comments=comments, 5423 this=this, 5424 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5425 ) 5426 self._match_r_paren(aliases) 5427 return aliases 5428 5429 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5430 self.STRING_ALIASES and self._parse_string_as_identifier() 5431 ) 5432 5433 if alias: 5434 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5435 column = this.this 5436 5437 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5438 if not this.comments and column and column.comments: 5439 this.comments = column.comments 5440 column.comments = None 5441 5442 return this 5443 5444 def _parse_id_var( 5445 self, 5446 any_token: bool = True, 5447 tokens: t.Optional[t.Collection[TokenType]] = None, 5448 ) -> t.Optional[exp.Expression]: 5449 identifier = self._parse_identifier() 5450 5451 if identifier: 5452 return identifier 5453 5454 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5455 quoted = self._prev.token_type == TokenType.STRING 5456 return exp.Identifier(this=self._prev.text, quoted=quoted) 5457 5458 return None 5459 5460 def _parse_string(self) -> t.Optional[exp.Expression]: 5461 if self._match_set(self.STRING_PARSERS): 5462 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5463 return self._parse_placeholder() 5464 5465 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5466 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5467 5468 def _parse_number(self) -> t.Optional[exp.Expression]: 5469 if self._match_set(self.NUMERIC_PARSERS): 5470 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5471 return self._parse_placeholder() 5472 5473 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5474 if self._match(TokenType.IDENTIFIER): 5475 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5476 return self._parse_placeholder() 5477 5478 def _parse_var( 5479 self, 5480 any_token: bool = False, 5481 tokens: t.Optional[t.Collection[TokenType]] = None, 5482 upper: bool = False, 5483 ) -> t.Optional[exp.Expression]: 5484 if ( 5485 (any_token and self._advance_any()) 5486 or self._match(TokenType.VAR) 5487 or (self._match_set(tokens) if tokens else False) 5488 ): 5489 return self.expression( 5490 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5491 ) 5492 return self._parse_placeholder() 5493 5494 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5495 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5496 self._advance() 5497 return self._prev 5498 return None 5499 5500 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5501 return self._parse_var() or self._parse_string() 5502 5503 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5504 return self._parse_primary() or self._parse_var(any_token=True) 5505 5506 def _parse_null(self) -> t.Optional[exp.Expression]: 5507 if self._match_set(self.NULL_TOKENS): 5508 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5509 return self._parse_placeholder() 5510 5511 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5512 if self._match(TokenType.TRUE): 5513 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5514 if self._match(TokenType.FALSE): 5515 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5516 return self._parse_placeholder() 5517 5518 def _parse_star(self) -> t.Optional[exp.Expression]: 5519 if self._match(TokenType.STAR): 5520 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5521 return self._parse_placeholder() 5522 5523 def _parse_parameter(self) -> exp.Parameter: 5524 self._match(TokenType.L_BRACE) 5525 this = self._parse_identifier() or self._parse_primary_or_var() 5526 expression = self._match(TokenType.COLON) and ( 5527 self._parse_identifier() or self._parse_primary_or_var() 5528 ) 5529 self._match(TokenType.R_BRACE) 5530 return self.expression(exp.Parameter, this=this, expression=expression) 5531 5532 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5533 if self._match_set(self.PLACEHOLDER_PARSERS): 5534 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5535 if placeholder: 5536 return placeholder 5537 self._advance(-1) 5538 return None 5539 5540 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5541 if not self._match(TokenType.EXCEPT): 5542 return None 5543 if self._match(TokenType.L_PAREN, advance=False): 5544 return self._parse_wrapped_csv(self._parse_column) 5545 5546 except_column = self._parse_column() 5547 return [except_column] if except_column else None 5548 5549 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5550 if not self._match(TokenType.REPLACE): 5551 return None 5552 if self._match(TokenType.L_PAREN, advance=False): 5553 return self._parse_wrapped_csv(self._parse_expression) 5554 5555 replace_expression = self._parse_expression() 5556 return [replace_expression] if replace_expression else None 5557 5558 def _parse_csv( 5559 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5560 ) -> t.List[exp.Expression]: 5561 parse_result = parse_method() 5562 items = [parse_result] if parse_result is not None else [] 5563 5564 while self._match(sep): 5565 self._add_comments(parse_result) 5566 parse_result = parse_method() 5567 if parse_result is not None: 5568 items.append(parse_result) 5569 5570 return items 5571 5572 def _parse_tokens( 5573 self, parse_method: t.Callable, expressions: t.Dict 5574 ) -> t.Optional[exp.Expression]: 5575 this = parse_method() 5576 5577 while self._match_set(expressions): 5578 this = self.expression( 5579 expressions[self._prev.token_type], 5580 this=this, 5581 comments=self._prev_comments, 5582 expression=parse_method(), 5583 ) 5584 5585 return this 5586 5587 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5588 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5589 5590 def _parse_wrapped_csv( 5591 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5592 ) -> t.List[exp.Expression]: 5593 return self._parse_wrapped( 5594 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5595 ) 5596 5597 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5598 wrapped = self._match(TokenType.L_PAREN) 5599 if not wrapped and not optional: 5600 self.raise_error("Expecting (") 5601 parse_result = parse_method() 5602 if wrapped: 5603 self._match_r_paren() 5604 return parse_result 5605 5606 def _parse_expressions(self) -> t.List[exp.Expression]: 5607 return self._parse_csv(self._parse_expression) 5608 5609 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5610 return self._parse_select() or self._parse_set_operations( 5611 self._parse_expression() if alias else self._parse_conjunction() 5612 ) 5613 5614 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5615 return self._parse_query_modifiers( 5616 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5617 ) 5618 5619 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5620 this = None 5621 if self._match_texts(self.TRANSACTION_KIND): 5622 this = self._prev.text 5623 5624 self._match_texts(("TRANSACTION", "WORK")) 5625 5626 modes = [] 5627 while True: 5628 mode = [] 5629 while self._match(TokenType.VAR): 5630 mode.append(self._prev.text) 5631 5632 if mode: 5633 modes.append(" ".join(mode)) 5634 if not self._match(TokenType.COMMA): 5635 break 5636 5637 return self.expression(exp.Transaction, this=this, modes=modes) 5638 5639 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5640 chain = None 5641 savepoint = None 5642 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5643 5644 self._match_texts(("TRANSACTION", "WORK")) 5645 5646 if self._match_text_seq("TO"): 5647 self._match_text_seq("SAVEPOINT") 5648 savepoint = self._parse_id_var() 5649 5650 if self._match(TokenType.AND): 5651 chain = not self._match_text_seq("NO") 5652 self._match_text_seq("CHAIN") 5653 5654 if is_rollback: 5655 return self.expression(exp.Rollback, savepoint=savepoint) 5656 5657 return self.expression(exp.Commit, chain=chain) 5658 5659 def _parse_refresh(self) -> exp.Refresh: 5660 self._match(TokenType.TABLE) 5661 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5662 5663 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5664 if not self._match_text_seq("ADD"): 5665 return None 5666 5667 self._match(TokenType.COLUMN) 5668 exists_column = self._parse_exists(not_=True) 5669 expression = self._parse_field_def() 5670 5671 if expression: 5672 expression.set("exists", exists_column) 5673 5674 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5675 if self._match_texts(("FIRST", "AFTER")): 5676 position = self._prev.text 5677 column_position = self.expression( 5678 exp.ColumnPosition, this=self._parse_column(), position=position 5679 ) 5680 expression.set("position", column_position) 5681 5682 return expression 5683 5684 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5685 drop = self._match(TokenType.DROP) and self._parse_drop() 5686 if drop and not isinstance(drop, exp.Command): 5687 drop.set("kind", drop.args.get("kind", "COLUMN")) 5688 return drop 5689 5690 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5691 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5692 return self.expression( 5693 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5694 ) 5695 5696 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5697 index = self._index - 1 5698 5699 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5700 return self._parse_csv( 5701 lambda: self.expression( 5702 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5703 ) 5704 ) 5705 5706 self._retreat(index) 5707 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5708 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5709 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5710 5711 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5712 self._match(TokenType.COLUMN) 5713 column = self._parse_field(any_token=True) 5714 5715 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5716 return self.expression(exp.AlterColumn, this=column, drop=True) 5717 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5718 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5719 if self._match(TokenType.COMMENT): 5720 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5721 5722 self._match_text_seq("SET", "DATA") 5723 self._match_text_seq("TYPE") 5724 return self.expression( 5725 exp.AlterColumn, 5726 this=column, 5727 dtype=self._parse_types(), 5728 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5729 using=self._match(TokenType.USING) and self._parse_conjunction(), 5730 ) 5731 5732 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5733 index = self._index - 1 5734 5735 partition_exists = self._parse_exists() 5736 if self._match(TokenType.PARTITION, advance=False): 5737 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5738 5739 self._retreat(index) 5740 return self._parse_csv(self._parse_drop_column) 5741 5742 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5743 if self._match(TokenType.COLUMN): 5744 exists = self._parse_exists() 5745 old_column = self._parse_column() 5746 to = self._match_text_seq("TO") 5747 new_column = self._parse_column() 5748 5749 if old_column is None or to is None or new_column is None: 5750 return None 5751 5752 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5753 5754 self._match_text_seq("TO") 5755 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5756 5757 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5758 start = self._prev 5759 5760 if not self._match(TokenType.TABLE): 5761 return self._parse_as_command(start) 5762 5763 exists = self._parse_exists() 5764 only = self._match_text_seq("ONLY") 5765 this = self._parse_table(schema=True) 5766 5767 if self._next: 5768 self._advance() 5769 5770 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5771 if parser: 5772 actions = ensure_list(parser(self)) 5773 options = self._parse_csv(self._parse_property) 5774 5775 if not self._curr and actions: 5776 return self.expression( 5777 exp.AlterTable, 5778 this=this, 5779 exists=exists, 5780 actions=actions, 5781 only=only, 5782 options=options, 5783 ) 5784 5785 return self._parse_as_command(start) 5786 5787 def _parse_merge(self) -> exp.Merge: 5788 self._match(TokenType.INTO) 5789 target = self._parse_table() 5790 5791 if target and self._match(TokenType.ALIAS, advance=False): 5792 target.set("alias", self._parse_table_alias()) 5793 5794 self._match(TokenType.USING) 5795 using = self._parse_table() 5796 5797 self._match(TokenType.ON) 5798 on = self._parse_conjunction() 5799 5800 return self.expression( 5801 exp.Merge, 5802 this=target, 5803 using=using, 5804 on=on, 5805 expressions=self._parse_when_matched(), 5806 ) 5807 5808 def _parse_when_matched(self) -> t.List[exp.When]: 5809 whens = [] 5810 5811 while self._match(TokenType.WHEN): 5812 matched = not self._match(TokenType.NOT) 5813 self._match_text_seq("MATCHED") 5814 source = ( 5815 False 5816 if self._match_text_seq("BY", "TARGET") 5817 else self._match_text_seq("BY", "SOURCE") 5818 ) 5819 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5820 5821 self._match(TokenType.THEN) 5822 5823 if self._match(TokenType.INSERT): 5824 _this = self._parse_star() 5825 if _this: 5826 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5827 else: 5828 then = self.expression( 5829 exp.Insert, 5830 this=self._parse_value(), 5831 expression=self._match_text_seq("VALUES") and self._parse_value(), 5832 ) 5833 elif self._match(TokenType.UPDATE): 5834 expressions = self._parse_star() 5835 if expressions: 5836 then = self.expression(exp.Update, expressions=expressions) 5837 else: 5838 then = self.expression( 5839 exp.Update, 5840 expressions=self._match(TokenType.SET) 5841 and self._parse_csv(self._parse_equality), 5842 ) 5843 elif self._match(TokenType.DELETE): 5844 then = self.expression(exp.Var, this=self._prev.text) 5845 else: 5846 then = None 5847 5848 whens.append( 5849 self.expression( 5850 exp.When, 5851 matched=matched, 5852 source=source, 5853 condition=condition, 5854 then=then, 5855 ) 5856 ) 5857 return whens 5858 5859 def _parse_show(self) -> t.Optional[exp.Expression]: 5860 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5861 if parser: 5862 return parser(self) 5863 return self._parse_as_command(self._prev) 5864 5865 def _parse_set_item_assignment( 5866 self, kind: t.Optional[str] = None 5867 ) -> t.Optional[exp.Expression]: 5868 index = self._index 5869 5870 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5871 return self._parse_set_transaction(global_=kind == "GLOBAL") 5872 5873 left = self._parse_primary() or self._parse_id_var() 5874 assignment_delimiter = self._match_texts(("=", "TO")) 5875 5876 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5877 self._retreat(index) 5878 return None 5879 5880 right = self._parse_statement() or self._parse_id_var() 5881 this = self.expression(exp.EQ, this=left, expression=right) 5882 5883 return self.expression(exp.SetItem, this=this, kind=kind) 5884 5885 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5886 self._match_text_seq("TRANSACTION") 5887 characteristics = self._parse_csv( 5888 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5889 ) 5890 return self.expression( 5891 exp.SetItem, 5892 expressions=characteristics, 5893 kind="TRANSACTION", 5894 **{"global": global_}, # type: ignore 5895 ) 5896 5897 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5898 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5899 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5900 5901 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5902 index = self._index 5903 set_ = self.expression( 5904 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5905 ) 5906 5907 if self._curr: 5908 self._retreat(index) 5909 return self._parse_as_command(self._prev) 5910 5911 return set_ 5912 5913 def _parse_var_from_options( 5914 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5915 ) -> t.Optional[exp.Var]: 5916 start = self._curr 5917 if not start: 5918 return None 5919 5920 option = start.text.upper() 5921 continuations = options.get(option) 5922 5923 index = self._index 5924 self._advance() 5925 for keywords in continuations or []: 5926 if isinstance(keywords, str): 5927 keywords = (keywords,) 5928 5929 if self._match_text_seq(*keywords): 5930 option = f"{option} {' '.join(keywords)}" 5931 break 5932 else: 5933 if continuations or continuations is None: 5934 if raise_unmatched: 5935 self.raise_error(f"Unknown option {option}") 5936 5937 self._retreat(index) 5938 return None 5939 5940 return exp.var(option) 5941 5942 def _parse_as_command(self, start: Token) -> exp.Command: 5943 while self._curr: 5944 self._advance() 5945 text = self._find_sql(start, self._prev) 5946 size = len(start.text) 5947 self._warn_unsupported() 5948 return exp.Command(this=text[:size], expression=text[size:]) 5949 5950 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5951 settings = [] 5952 5953 self._match_l_paren() 5954 kind = self._parse_id_var() 5955 5956 if self._match(TokenType.L_PAREN): 5957 while True: 5958 key = self._parse_id_var() 5959 value = self._parse_primary() 5960 5961 if not key and value is None: 5962 break 5963 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5964 self._match(TokenType.R_PAREN) 5965 5966 self._match_r_paren() 5967 5968 return self.expression( 5969 exp.DictProperty, 5970 this=this, 5971 kind=kind.this if kind else None, 5972 settings=settings, 5973 ) 5974 5975 def _parse_dict_range(self, this: str) -> exp.DictRange: 5976 self._match_l_paren() 5977 has_min = self._match_text_seq("MIN") 5978 if has_min: 5979 min = self._parse_var() or self._parse_primary() 5980 self._match_text_seq("MAX") 5981 max = self._parse_var() or self._parse_primary() 5982 else: 5983 max = self._parse_var() or self._parse_primary() 5984 min = exp.Literal.number(0) 5985 self._match_r_paren() 5986 return self.expression(exp.DictRange, this=this, min=min, max=max) 5987 5988 def _parse_comprehension( 5989 self, this: t.Optional[exp.Expression] 5990 ) -> t.Optional[exp.Comprehension]: 5991 index = self._index 5992 expression = self._parse_column() 5993 if not self._match(TokenType.IN): 5994 self._retreat(index - 1) 5995 return None 5996 iterator = self._parse_column() 5997 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5998 return self.expression( 5999 exp.Comprehension, 6000 this=this, 6001 expression=expression, 6002 iterator=iterator, 6003 condition=condition, 6004 ) 6005 6006 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6007 if self._match(TokenType.HEREDOC_STRING): 6008 return self.expression(exp.Heredoc, this=self._prev.text) 6009 6010 if not self._match_text_seq("$"): 6011 return None 6012 6013 tags = ["$"] 6014 tag_text = None 6015 6016 if self._is_connected(): 6017 self._advance() 6018 tags.append(self._prev.text.upper()) 6019 else: 6020 self.raise_error("No closing $ found") 6021 6022 if tags[-1] != "$": 6023 if self._is_connected() and self._match_text_seq("$"): 6024 tag_text = tags[-1] 6025 tags.append("$") 6026 else: 6027 self.raise_error("No closing $ found") 6028 6029 heredoc_start = self._curr 6030 6031 while self._curr: 6032 if self._match_text_seq(*tags, advance=False): 6033 this = self._find_sql(heredoc_start, self._prev) 6034 self._advance(len(tags)) 6035 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6036 6037 self._advance() 6038 6039 self.raise_error(f"No closing {''.join(tags)} found") 6040 return None 6041 6042 def _find_parser( 6043 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6044 ) -> t.Optional[t.Callable]: 6045 if not self._curr: 6046 return None 6047 6048 index = self._index 6049 this = [] 6050 while True: 6051 # The current token might be multiple words 6052 curr = self._curr.text.upper() 6053 key = curr.split(" ") 6054 this.append(curr) 6055 6056 self._advance() 6057 result, trie = in_trie(trie, key) 6058 if result == TrieResult.FAILED: 6059 break 6060 6061 if result == TrieResult.EXISTS: 6062 subparser = parsers[" ".join(this)] 6063 return subparser 6064 6065 self._retreat(index) 6066 return None 6067 6068 def _match(self, token_type, advance=True, expression=None): 6069 if not self._curr: 6070 return None 6071 6072 if self._curr.token_type == token_type: 6073 if advance: 6074 self._advance() 6075 self._add_comments(expression) 6076 return True 6077 6078 return None 6079 6080 def _match_set(self, types, advance=True): 6081 if not self._curr: 6082 return None 6083 6084 if self._curr.token_type in types: 6085 if advance: 6086 self._advance() 6087 return True 6088 6089 return None 6090 6091 def _match_pair(self, token_type_a, token_type_b, advance=True): 6092 if not self._curr or not self._next: 6093 return None 6094 6095 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6096 if advance: 6097 self._advance(2) 6098 return True 6099 6100 return None 6101 6102 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6103 if not self._match(TokenType.L_PAREN, expression=expression): 6104 self.raise_error("Expecting (") 6105 6106 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6107 if not self._match(TokenType.R_PAREN, expression=expression): 6108 self.raise_error("Expecting )") 6109 6110 def _match_texts(self, texts, advance=True): 6111 if self._curr and self._curr.text.upper() in texts: 6112 if advance: 6113 self._advance() 6114 return True 6115 return None 6116 6117 def _match_text_seq(self, *texts, advance=True): 6118 index = self._index 6119 for text in texts: 6120 if self._curr and self._curr.text.upper() == text: 6121 self._advance() 6122 else: 6123 self._retreat(index) 6124 return None 6125 6126 if not advance: 6127 self._retreat(index) 6128 6129 return True 6130 6131 def _replace_lambda( 6132 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6133 ) -> t.Optional[exp.Expression]: 6134 if not node: 6135 return node 6136 6137 for column in node.find_all(exp.Column): 6138 if column.parts[0].name in lambda_variables: 6139 dot_or_id = column.to_dot() if column.table else column.this 6140 parent = column.parent 6141 6142 while isinstance(parent, exp.Dot): 6143 if not isinstance(parent.parent, exp.Dot): 6144 parent.replace(dot_or_id) 6145 break 6146 parent = parent.parent 6147 else: 6148 if column is node: 6149 node = dot_or_id 6150 else: 6151 column.replace(dot_or_id) 6152 return node 6153 6154 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6155 start = self._prev 6156 6157 # Not to be confused with TRUNCATE(number, decimals) function call 6158 if self._match(TokenType.L_PAREN): 6159 self._retreat(self._index - 2) 6160 return self._parse_function() 6161 6162 # Clickhouse supports TRUNCATE DATABASE as well 6163 is_database = self._match(TokenType.DATABASE) 6164 6165 self._match(TokenType.TABLE) 6166 6167 exists = self._parse_exists(not_=False) 6168 6169 expressions = self._parse_csv( 6170 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6171 ) 6172 6173 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6174 6175 if self._match_text_seq("RESTART", "IDENTITY"): 6176 identity = "RESTART" 6177 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6178 identity = "CONTINUE" 6179 else: 6180 identity = None 6181 6182 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6183 option = self._prev.text 6184 else: 6185 option = None 6186 6187 partition = self._parse_partition() 6188 6189 # Fallback case 6190 if self._curr: 6191 return self._parse_as_command(start) 6192 6193 return self.expression( 6194 exp.TruncateTable, 6195 expressions=expressions, 6196 is_database=is_database, 6197 exists=exists, 6198 cluster=cluster, 6199 identity=identity, 6200 option=option, 6201 partition=partition, 6202 ) 6203 6204 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6205 this = self._parse_ordered(self._parse_opclass) 6206 6207 if not self._match(TokenType.WITH): 6208 return this 6209 6210 op = self._parse_var(any_token=True) 6211 6212 return self.expression(exp.WithOperator, this=this, op=op)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1125 def __init__( 1126 self, 1127 error_level: t.Optional[ErrorLevel] = None, 1128 error_message_context: int = 100, 1129 max_errors: int = 3, 1130 dialect: DialectType = None, 1131 ): 1132 from sqlglot.dialects import Dialect 1133 1134 self.error_level = error_level or ErrorLevel.IMMEDIATE 1135 self.error_message_context = error_message_context 1136 self.max_errors = max_errors 1137 self.dialect = Dialect.get_or_raise(dialect) 1138 self.reset()
1150 def parse( 1151 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1152 ) -> t.List[t.Optional[exp.Expression]]: 1153 """ 1154 Parses a list of tokens and returns a list of syntax trees, one tree 1155 per parsed SQL statement. 1156 1157 Args: 1158 raw_tokens: The list of tokens. 1159 sql: The original SQL string, used to produce helpful debug messages. 1160 1161 Returns: 1162 The list of the produced syntax trees. 1163 """ 1164 return self._parse( 1165 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1166 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1168 def parse_into( 1169 self, 1170 expression_types: exp.IntoType, 1171 raw_tokens: t.List[Token], 1172 sql: t.Optional[str] = None, 1173 ) -> t.List[t.Optional[exp.Expression]]: 1174 """ 1175 Parses a list of tokens into a given Expression type. If a collection of Expression 1176 types is given instead, this method will try to parse the token list into each one 1177 of them, stopping at the first for which the parsing succeeds. 1178 1179 Args: 1180 expression_types: The expression type(s) to try and parse the token list into. 1181 raw_tokens: The list of tokens. 1182 sql: The original SQL string, used to produce helpful debug messages. 1183 1184 Returns: 1185 The target Expression. 1186 """ 1187 errors = [] 1188 for expression_type in ensure_list(expression_types): 1189 parser = self.EXPRESSION_PARSERS.get(expression_type) 1190 if not parser: 1191 raise TypeError(f"No parser registered for {expression_type}") 1192 1193 try: 1194 return self._parse(parser, raw_tokens, sql) 1195 except ParseError as e: 1196 e.errors[0]["into_expression"] = expression_type 1197 errors.append(e) 1198 1199 raise ParseError( 1200 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1201 errors=merge_errors(errors), 1202 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1239 def check_errors(self) -> None: 1240 """Logs or raises any found errors, depending on the chosen error level setting.""" 1241 if self.error_level == ErrorLevel.WARN: 1242 for error in self.errors: 1243 logger.error(str(error)) 1244 elif self.error_level == ErrorLevel.RAISE and self.errors: 1245 raise ParseError( 1246 concat_messages(self.errors, self.max_errors), 1247 errors=merge_errors(self.errors), 1248 )
Logs or raises any found errors, depending on the chosen error level setting.
1250 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1251 """ 1252 Appends an error in the list of recorded errors or raises it, depending on the chosen 1253 error level setting. 1254 """ 1255 token = token or self._curr or self._prev or Token.string("") 1256 start = token.start 1257 end = token.end + 1 1258 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1259 highlight = self.sql[start:end] 1260 end_context = self.sql[end : end + self.error_message_context] 1261 1262 error = ParseError.new( 1263 f"{message}. Line {token.line}, Col: {token.col}.\n" 1264 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1265 description=message, 1266 line=token.line, 1267 col=token.col, 1268 start_context=start_context, 1269 highlight=highlight, 1270 end_context=end_context, 1271 ) 1272 1273 if self.error_level == ErrorLevel.IMMEDIATE: 1274 raise error 1275 1276 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1278 def expression( 1279 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1280 ) -> E: 1281 """ 1282 Creates a new, validated Expression. 1283 1284 Args: 1285 exp_class: The expression class to instantiate. 1286 comments: An optional list of comments to attach to the expression. 1287 kwargs: The arguments to set for the expression along with their respective values. 1288 1289 Returns: 1290 The target expression. 1291 """ 1292 instance = exp_class(**kwargs) 1293 instance.add_comments(comments) if comments else self._add_comments(instance) 1294 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1301 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1302 """ 1303 Validates an Expression, making sure that all its mandatory arguments are set. 1304 1305 Args: 1306 expression: The expression to validate. 1307 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1308 1309 Returns: 1310 The validated expression. 1311 """ 1312 if self.error_level != ErrorLevel.IGNORE: 1313 for error_message in expression.error_messages(args): 1314 self.raise_error(error_message) 1315 1316 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.