sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18logger = logging.getLogger("sqlglot") 19 20OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 21 22 23def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 24 if len(args) == 1 and args[0].is_star: 25 return exp.StarMap(this=args[0]) 26 27 keys = [] 28 values = [] 29 for i in range(0, len(args), 2): 30 keys.append(args[i]) 31 values.append(args[i + 1]) 32 33 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 34 35 36def build_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 50 # Default argument order is base, expression 51 this = seq_get(args, 0) 52 expression = seq_get(args, 1) 53 54 if expression: 55 if not dialect.LOG_BASE_FIRST: 56 this, expression = expression, this 57 return exp.Log(this=this, expression=expression) 58 59 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 60 61 62def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 63 def _builder(args: t.List, dialect: Dialect) -> E: 64 expression = expr_type( 65 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 66 ) 67 if len(args) > 2 and expr_type is exp.JSONExtract: 68 expression.set("expressions", args[2:]) 69 70 return expression 71 72 return _builder 73 74 75class _Parser(type): 76 def __new__(cls, clsname, bases, attrs): 77 klass = super().__new__(cls, clsname, bases, attrs) 78 79 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 80 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 81 82 return klass 83 84 85class Parser(metaclass=_Parser): 86 """ 87 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 88 89 Args: 90 error_level: The desired error level. 91 Default: ErrorLevel.IMMEDIATE 92 error_message_context: The amount of context to capture from a query string when displaying 93 the error message (in number of characters). 94 Default: 100 95 max_errors: Maximum number of error messages to include in a raised ParseError. 96 This is only relevant if error_level is ErrorLevel.RAISE. 97 Default: 3 98 """ 99 100 FUNCTIONS: t.Dict[str, t.Callable] = { 101 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 102 "CONCAT": lambda args, dialect: exp.Concat( 103 expressions=args, 104 safe=not dialect.STRICT_STRING_CONCAT, 105 coalesce=dialect.CONCAT_COALESCE, 106 ), 107 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 108 expressions=args, 109 safe=not dialect.STRICT_STRING_CONCAT, 110 coalesce=dialect.CONCAT_COALESCE, 111 ), 112 "DATE_TO_DATE_STR": lambda args: exp.Cast( 113 this=seq_get(args, 0), 114 to=exp.DataType(this=exp.DataType.Type.TEXT), 115 ), 116 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 117 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 118 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 119 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 120 "LIKE": build_like, 121 "LOG": build_logarithm, 122 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 123 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 124 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 125 "TIME_TO_TIME_STR": lambda args: exp.Cast( 126 this=seq_get(args, 0), 127 to=exp.DataType(this=exp.DataType.Type.TEXT), 128 ), 129 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 130 this=exp.Cast( 131 this=seq_get(args, 0), 132 to=exp.DataType(this=exp.DataType.Type.TEXT), 133 ), 134 start=exp.Literal.number(1), 135 length=exp.Literal.number(10), 136 ), 137 "VAR_MAP": build_var_map, 138 } 139 140 NO_PAREN_FUNCTIONS = { 141 TokenType.CURRENT_DATE: exp.CurrentDate, 142 TokenType.CURRENT_DATETIME: exp.CurrentDate, 143 TokenType.CURRENT_TIME: exp.CurrentTime, 144 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 145 TokenType.CURRENT_USER: exp.CurrentUser, 146 } 147 148 STRUCT_TYPE_TOKENS = { 149 TokenType.NESTED, 150 TokenType.OBJECT, 151 TokenType.STRUCT, 152 } 153 154 NESTED_TYPE_TOKENS = { 155 TokenType.ARRAY, 156 TokenType.LOWCARDINALITY, 157 TokenType.MAP, 158 TokenType.NULLABLE, 159 *STRUCT_TYPE_TOKENS, 160 } 161 162 ENUM_TYPE_TOKENS = { 163 TokenType.ENUM, 164 TokenType.ENUM8, 165 TokenType.ENUM16, 166 } 167 168 AGGREGATE_TYPE_TOKENS = { 169 TokenType.AGGREGATEFUNCTION, 170 TokenType.SIMPLEAGGREGATEFUNCTION, 171 } 172 173 TYPE_TOKENS = { 174 TokenType.BIT, 175 TokenType.BOOLEAN, 176 TokenType.TINYINT, 177 TokenType.UTINYINT, 178 TokenType.SMALLINT, 179 TokenType.USMALLINT, 180 TokenType.INT, 181 TokenType.UINT, 182 TokenType.BIGINT, 183 TokenType.UBIGINT, 184 TokenType.INT128, 185 TokenType.UINT128, 186 TokenType.INT256, 187 TokenType.UINT256, 188 TokenType.MEDIUMINT, 189 TokenType.UMEDIUMINT, 190 TokenType.FIXEDSTRING, 191 TokenType.FLOAT, 192 TokenType.DOUBLE, 193 TokenType.CHAR, 194 TokenType.NCHAR, 195 TokenType.VARCHAR, 196 TokenType.NVARCHAR, 197 TokenType.BPCHAR, 198 TokenType.TEXT, 199 TokenType.MEDIUMTEXT, 200 TokenType.LONGTEXT, 201 TokenType.MEDIUMBLOB, 202 TokenType.LONGBLOB, 203 TokenType.BINARY, 204 TokenType.VARBINARY, 205 TokenType.JSON, 206 TokenType.JSONB, 207 TokenType.INTERVAL, 208 TokenType.TINYBLOB, 209 TokenType.TINYTEXT, 210 TokenType.TIME, 211 TokenType.TIMETZ, 212 TokenType.TIMESTAMP, 213 TokenType.TIMESTAMP_S, 214 TokenType.TIMESTAMP_MS, 215 TokenType.TIMESTAMP_NS, 216 TokenType.TIMESTAMPTZ, 217 TokenType.TIMESTAMPLTZ, 218 TokenType.DATETIME, 219 TokenType.DATETIME64, 220 TokenType.DATE, 221 TokenType.DATE32, 222 TokenType.INT4RANGE, 223 TokenType.INT4MULTIRANGE, 224 TokenType.INT8RANGE, 225 TokenType.INT8MULTIRANGE, 226 TokenType.NUMRANGE, 227 TokenType.NUMMULTIRANGE, 228 TokenType.TSRANGE, 229 TokenType.TSMULTIRANGE, 230 TokenType.TSTZRANGE, 231 TokenType.TSTZMULTIRANGE, 232 TokenType.DATERANGE, 233 TokenType.DATEMULTIRANGE, 234 TokenType.DECIMAL, 235 TokenType.UDECIMAL, 236 TokenType.BIGDECIMAL, 237 TokenType.UUID, 238 TokenType.GEOGRAPHY, 239 TokenType.GEOMETRY, 240 TokenType.HLLSKETCH, 241 TokenType.HSTORE, 242 TokenType.PSEUDO_TYPE, 243 TokenType.SUPER, 244 TokenType.SERIAL, 245 TokenType.SMALLSERIAL, 246 TokenType.BIGSERIAL, 247 TokenType.XML, 248 TokenType.YEAR, 249 TokenType.UNIQUEIDENTIFIER, 250 TokenType.USERDEFINED, 251 TokenType.MONEY, 252 TokenType.SMALLMONEY, 253 TokenType.ROWVERSION, 254 TokenType.IMAGE, 255 TokenType.VARIANT, 256 TokenType.OBJECT, 257 TokenType.OBJECT_IDENTIFIER, 258 TokenType.INET, 259 TokenType.IPADDRESS, 260 TokenType.IPPREFIX, 261 TokenType.IPV4, 262 TokenType.IPV6, 263 TokenType.UNKNOWN, 264 TokenType.NULL, 265 TokenType.NAME, 266 *ENUM_TYPE_TOKENS, 267 *NESTED_TYPE_TOKENS, 268 *AGGREGATE_TYPE_TOKENS, 269 } 270 271 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 272 TokenType.BIGINT: TokenType.UBIGINT, 273 TokenType.INT: TokenType.UINT, 274 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 275 TokenType.SMALLINT: TokenType.USMALLINT, 276 TokenType.TINYINT: TokenType.UTINYINT, 277 TokenType.DECIMAL: TokenType.UDECIMAL, 278 } 279 280 SUBQUERY_PREDICATES = { 281 TokenType.ANY: exp.Any, 282 TokenType.ALL: exp.All, 283 TokenType.EXISTS: exp.Exists, 284 TokenType.SOME: exp.Any, 285 } 286 287 RESERVED_TOKENS = { 288 *Tokenizer.SINGLE_TOKENS.values(), 289 TokenType.SELECT, 290 } 291 292 DB_CREATABLES = { 293 TokenType.DATABASE, 294 TokenType.SCHEMA, 295 TokenType.TABLE, 296 TokenType.VIEW, 297 TokenType.MODEL, 298 TokenType.DICTIONARY, 299 TokenType.SEQUENCE, 300 TokenType.STORAGE_INTEGRATION, 301 } 302 303 CREATABLES = { 304 TokenType.COLUMN, 305 TokenType.CONSTRAINT, 306 TokenType.FUNCTION, 307 TokenType.INDEX, 308 TokenType.PROCEDURE, 309 TokenType.FOREIGN_KEY, 310 *DB_CREATABLES, 311 } 312 313 # Tokens that can represent identifiers 314 ID_VAR_TOKENS = { 315 TokenType.VAR, 316 TokenType.ANTI, 317 TokenType.APPLY, 318 TokenType.ASC, 319 TokenType.ASOF, 320 TokenType.AUTO_INCREMENT, 321 TokenType.BEGIN, 322 TokenType.BPCHAR, 323 TokenType.CACHE, 324 TokenType.CASE, 325 TokenType.COLLATE, 326 TokenType.COMMAND, 327 TokenType.COMMENT, 328 TokenType.COMMIT, 329 TokenType.CONSTRAINT, 330 TokenType.DEFAULT, 331 TokenType.DELETE, 332 TokenType.DESC, 333 TokenType.DESCRIBE, 334 TokenType.DICTIONARY, 335 TokenType.DIV, 336 TokenType.END, 337 TokenType.EXECUTE, 338 TokenType.ESCAPE, 339 TokenType.FALSE, 340 TokenType.FIRST, 341 TokenType.FILTER, 342 TokenType.FINAL, 343 TokenType.FORMAT, 344 TokenType.FULL, 345 TokenType.IS, 346 TokenType.ISNULL, 347 TokenType.INTERVAL, 348 TokenType.KEEP, 349 TokenType.KILL, 350 TokenType.LEFT, 351 TokenType.LOAD, 352 TokenType.MERGE, 353 TokenType.NATURAL, 354 TokenType.NEXT, 355 TokenType.OFFSET, 356 TokenType.OPERATOR, 357 TokenType.ORDINALITY, 358 TokenType.OVERLAPS, 359 TokenType.OVERWRITE, 360 TokenType.PARTITION, 361 TokenType.PERCENT, 362 TokenType.PIVOT, 363 TokenType.PRAGMA, 364 TokenType.RANGE, 365 TokenType.RECURSIVE, 366 TokenType.REFERENCES, 367 TokenType.REFRESH, 368 TokenType.REPLACE, 369 TokenType.RIGHT, 370 TokenType.ROW, 371 TokenType.ROWS, 372 TokenType.SEMI, 373 TokenType.SET, 374 TokenType.SETTINGS, 375 TokenType.SHOW, 376 TokenType.TEMPORARY, 377 TokenType.TOP, 378 TokenType.TRUE, 379 TokenType.TRUNCATE, 380 TokenType.UNIQUE, 381 TokenType.UNPIVOT, 382 TokenType.UPDATE, 383 TokenType.USE, 384 TokenType.VOLATILE, 385 TokenType.WINDOW, 386 *CREATABLES, 387 *SUBQUERY_PREDICATES, 388 *TYPE_TOKENS, 389 *NO_PAREN_FUNCTIONS, 390 } 391 392 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 393 394 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 395 TokenType.ANTI, 396 TokenType.APPLY, 397 TokenType.ASOF, 398 TokenType.FULL, 399 TokenType.LEFT, 400 TokenType.LOCK, 401 TokenType.NATURAL, 402 TokenType.OFFSET, 403 TokenType.RIGHT, 404 TokenType.SEMI, 405 TokenType.WINDOW, 406 } 407 408 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 409 410 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 411 412 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 413 414 FUNC_TOKENS = { 415 TokenType.COLLATE, 416 TokenType.COMMAND, 417 TokenType.CURRENT_DATE, 418 TokenType.CURRENT_DATETIME, 419 TokenType.CURRENT_TIMESTAMP, 420 TokenType.CURRENT_TIME, 421 TokenType.CURRENT_USER, 422 TokenType.FILTER, 423 TokenType.FIRST, 424 TokenType.FORMAT, 425 TokenType.GLOB, 426 TokenType.IDENTIFIER, 427 TokenType.INDEX, 428 TokenType.ISNULL, 429 TokenType.ILIKE, 430 TokenType.INSERT, 431 TokenType.LIKE, 432 TokenType.MERGE, 433 TokenType.OFFSET, 434 TokenType.PRIMARY_KEY, 435 TokenType.RANGE, 436 TokenType.REPLACE, 437 TokenType.RLIKE, 438 TokenType.ROW, 439 TokenType.UNNEST, 440 TokenType.VAR, 441 TokenType.LEFT, 442 TokenType.RIGHT, 443 TokenType.SEQUENCE, 444 TokenType.DATE, 445 TokenType.DATETIME, 446 TokenType.TABLE, 447 TokenType.TIMESTAMP, 448 TokenType.TIMESTAMPTZ, 449 TokenType.TRUNCATE, 450 TokenType.WINDOW, 451 TokenType.XOR, 452 *TYPE_TOKENS, 453 *SUBQUERY_PREDICATES, 454 } 455 456 CONJUNCTION = { 457 TokenType.AND: exp.And, 458 TokenType.OR: exp.Or, 459 } 460 461 EQUALITY = { 462 TokenType.COLON_EQ: exp.PropertyEQ, 463 TokenType.EQ: exp.EQ, 464 TokenType.NEQ: exp.NEQ, 465 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 466 } 467 468 COMPARISON = { 469 TokenType.GT: exp.GT, 470 TokenType.GTE: exp.GTE, 471 TokenType.LT: exp.LT, 472 TokenType.LTE: exp.LTE, 473 } 474 475 BITWISE = { 476 TokenType.AMP: exp.BitwiseAnd, 477 TokenType.CARET: exp.BitwiseXor, 478 TokenType.PIPE: exp.BitwiseOr, 479 } 480 481 TERM = { 482 TokenType.DASH: exp.Sub, 483 TokenType.PLUS: exp.Add, 484 TokenType.MOD: exp.Mod, 485 TokenType.COLLATE: exp.Collate, 486 } 487 488 FACTOR = { 489 TokenType.DIV: exp.IntDiv, 490 TokenType.LR_ARROW: exp.Distance, 491 TokenType.SLASH: exp.Div, 492 TokenType.STAR: exp.Mul, 493 } 494 495 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 496 497 TIMES = { 498 TokenType.TIME, 499 TokenType.TIMETZ, 500 } 501 502 TIMESTAMPS = { 503 TokenType.TIMESTAMP, 504 TokenType.TIMESTAMPTZ, 505 TokenType.TIMESTAMPLTZ, 506 *TIMES, 507 } 508 509 SET_OPERATIONS = { 510 TokenType.UNION, 511 TokenType.INTERSECT, 512 TokenType.EXCEPT, 513 } 514 515 JOIN_METHODS = { 516 TokenType.ASOF, 517 TokenType.NATURAL, 518 TokenType.POSITIONAL, 519 } 520 521 JOIN_SIDES = { 522 TokenType.LEFT, 523 TokenType.RIGHT, 524 TokenType.FULL, 525 } 526 527 JOIN_KINDS = { 528 TokenType.INNER, 529 TokenType.OUTER, 530 TokenType.CROSS, 531 TokenType.SEMI, 532 TokenType.ANTI, 533 } 534 535 JOIN_HINTS: t.Set[str] = set() 536 537 LAMBDAS = { 538 TokenType.ARROW: lambda self, expressions: self.expression( 539 exp.Lambda, 540 this=self._replace_lambda( 541 self._parse_conjunction(), 542 {node.name for node in expressions}, 543 ), 544 expressions=expressions, 545 ), 546 TokenType.FARROW: lambda self, expressions: self.expression( 547 exp.Kwarg, 548 this=exp.var(expressions[0].name), 549 expression=self._parse_conjunction(), 550 ), 551 } 552 553 COLUMN_OPERATORS = { 554 TokenType.DOT: None, 555 TokenType.DCOLON: lambda self, this, to: self.expression( 556 exp.Cast if self.STRICT_CAST else exp.TryCast, 557 this=this, 558 to=to, 559 ), 560 TokenType.ARROW: lambda self, this, path: self.expression( 561 exp.JSONExtract, 562 this=this, 563 expression=self.dialect.to_json_path(path), 564 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 565 ), 566 TokenType.DARROW: lambda self, this, path: self.expression( 567 exp.JSONExtractScalar, 568 this=this, 569 expression=self.dialect.to_json_path(path), 570 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 571 ), 572 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 573 exp.JSONBExtract, 574 this=this, 575 expression=path, 576 ), 577 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 578 exp.JSONBExtractScalar, 579 this=this, 580 expression=path, 581 ), 582 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 583 exp.JSONBContains, 584 this=this, 585 expression=key, 586 ), 587 } 588 589 EXPRESSION_PARSERS = { 590 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 591 exp.Column: lambda self: self._parse_column(), 592 exp.Condition: lambda self: self._parse_conjunction(), 593 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 594 exp.Expression: lambda self: self._parse_expression(), 595 exp.From: lambda self: self._parse_from(), 596 exp.Group: lambda self: self._parse_group(), 597 exp.Having: lambda self: self._parse_having(), 598 exp.Identifier: lambda self: self._parse_id_var(), 599 exp.Join: lambda self: self._parse_join(), 600 exp.Lambda: lambda self: self._parse_lambda(), 601 exp.Lateral: lambda self: self._parse_lateral(), 602 exp.Limit: lambda self: self._parse_limit(), 603 exp.Offset: lambda self: self._parse_offset(), 604 exp.Order: lambda self: self._parse_order(), 605 exp.Ordered: lambda self: self._parse_ordered(), 606 exp.Properties: lambda self: self._parse_properties(), 607 exp.Qualify: lambda self: self._parse_qualify(), 608 exp.Returning: lambda self: self._parse_returning(), 609 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 610 exp.Table: lambda self: self._parse_table_parts(), 611 exp.TableAlias: lambda self: self._parse_table_alias(), 612 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 613 exp.Where: lambda self: self._parse_where(), 614 exp.Window: lambda self: self._parse_named_window(), 615 exp.With: lambda self: self._parse_with(), 616 "JOIN_TYPE": lambda self: self._parse_join_parts(), 617 } 618 619 STATEMENT_PARSERS = { 620 TokenType.ALTER: lambda self: self._parse_alter(), 621 TokenType.BEGIN: lambda self: self._parse_transaction(), 622 TokenType.CACHE: lambda self: self._parse_cache(), 623 TokenType.COMMENT: lambda self: self._parse_comment(), 624 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 625 TokenType.CREATE: lambda self: self._parse_create(), 626 TokenType.DELETE: lambda self: self._parse_delete(), 627 TokenType.DESC: lambda self: self._parse_describe(), 628 TokenType.DESCRIBE: lambda self: self._parse_describe(), 629 TokenType.DROP: lambda self: self._parse_drop(), 630 TokenType.INSERT: lambda self: self._parse_insert(), 631 TokenType.KILL: lambda self: self._parse_kill(), 632 TokenType.LOAD: lambda self: self._parse_load(), 633 TokenType.MERGE: lambda self: self._parse_merge(), 634 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 635 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 636 TokenType.REFRESH: lambda self: self._parse_refresh(), 637 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 638 TokenType.SET: lambda self: self._parse_set(), 639 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 640 TokenType.UNCACHE: lambda self: self._parse_uncache(), 641 TokenType.UPDATE: lambda self: self._parse_update(), 642 TokenType.USE: lambda self: self.expression( 643 exp.Use, 644 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 645 this=self._parse_table(schema=False), 646 ), 647 } 648 649 UNARY_PARSERS = { 650 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 651 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 652 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 653 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 654 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 655 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 656 } 657 658 STRING_PARSERS = { 659 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 660 exp.RawString, this=token.text 661 ), 662 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 663 exp.National, this=token.text 664 ), 665 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 666 TokenType.STRING: lambda self, token: self.expression( 667 exp.Literal, this=token.text, is_string=True 668 ), 669 TokenType.UNICODE_STRING: lambda self, token: self.expression( 670 exp.UnicodeString, 671 this=token.text, 672 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 673 ), 674 } 675 676 NUMERIC_PARSERS = { 677 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 678 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 679 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 680 TokenType.NUMBER: lambda self, token: self.expression( 681 exp.Literal, this=token.text, is_string=False 682 ), 683 } 684 685 PRIMARY_PARSERS = { 686 **STRING_PARSERS, 687 **NUMERIC_PARSERS, 688 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 689 TokenType.NULL: lambda self, _: self.expression(exp.Null), 690 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 691 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 692 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 693 TokenType.STAR: lambda self, _: self.expression( 694 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 695 ), 696 } 697 698 PLACEHOLDER_PARSERS = { 699 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 700 TokenType.PARAMETER: lambda self: self._parse_parameter(), 701 TokenType.COLON: lambda self: ( 702 self.expression(exp.Placeholder, this=self._prev.text) 703 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 704 else None 705 ), 706 } 707 708 RANGE_PARSERS = { 709 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 710 TokenType.GLOB: binary_range_parser(exp.Glob), 711 TokenType.ILIKE: binary_range_parser(exp.ILike), 712 TokenType.IN: lambda self, this: self._parse_in(this), 713 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 714 TokenType.IS: lambda self, this: self._parse_is(this), 715 TokenType.LIKE: binary_range_parser(exp.Like), 716 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 717 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 718 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 719 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 720 } 721 722 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 723 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 724 "AUTO": lambda self: self._parse_auto_property(), 725 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 726 "BACKUP": lambda self: self.expression( 727 exp.BackupProperty, this=self._parse_var(any_token=True) 728 ), 729 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 730 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 731 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 732 "CHECKSUM": lambda self: self._parse_checksum(), 733 "CLUSTER BY": lambda self: self._parse_cluster(), 734 "CLUSTERED": lambda self: self._parse_clustered_by(), 735 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 736 exp.CollateProperty, **kwargs 737 ), 738 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 739 "CONTAINS": lambda self: self._parse_contains_property(), 740 "COPY": lambda self: self._parse_copy_property(), 741 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 742 "DEFINER": lambda self: self._parse_definer(), 743 "DETERMINISTIC": lambda self: self.expression( 744 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 745 ), 746 "DISTKEY": lambda self: self._parse_distkey(), 747 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 748 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 749 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 750 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 751 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 752 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 753 "FREESPACE": lambda self: self._parse_freespace(), 754 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 755 "HEAP": lambda self: self.expression(exp.HeapProperty), 756 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 757 "IMMUTABLE": lambda self: self.expression( 758 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 759 ), 760 "INHERITS": lambda self: self.expression( 761 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 762 ), 763 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 764 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 765 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 766 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 767 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 768 "LIKE": lambda self: self._parse_create_like(), 769 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 770 "LOCK": lambda self: self._parse_locking(), 771 "LOCKING": lambda self: self._parse_locking(), 772 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 773 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 774 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 775 "MODIFIES": lambda self: self._parse_modifies_property(), 776 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 777 "NO": lambda self: self._parse_no_property(), 778 "ON": lambda self: self._parse_on_property(), 779 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 780 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 781 "PARTITION": lambda self: self._parse_partitioned_of(), 782 "PARTITION BY": lambda self: self._parse_partitioned_by(), 783 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 784 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 785 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 786 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 787 "READS": lambda self: self._parse_reads_property(), 788 "REMOTE": lambda self: self._parse_remote_with_connection(), 789 "RETURNS": lambda self: self._parse_returns(), 790 "ROW": lambda self: self._parse_row(), 791 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 792 "SAMPLE": lambda self: self.expression( 793 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 794 ), 795 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 796 "SETTINGS": lambda self: self.expression( 797 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 798 ), 799 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 800 "SORTKEY": lambda self: self._parse_sortkey(), 801 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 802 "STABLE": lambda self: self.expression( 803 exp.StabilityProperty, this=exp.Literal.string("STABLE") 804 ), 805 "STORED": lambda self: self._parse_stored(), 806 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 807 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 808 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 809 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 810 "TO": lambda self: self._parse_to_table(), 811 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 812 "TRANSFORM": lambda self: self.expression( 813 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 814 ), 815 "TTL": lambda self: self._parse_ttl(), 816 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 817 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 818 "VOLATILE": lambda self: self._parse_volatile_property(), 819 "WITH": lambda self: self._parse_with_property(), 820 } 821 822 CONSTRAINT_PARSERS = { 823 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 824 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 825 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 826 "CHARACTER SET": lambda self: self.expression( 827 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 828 ), 829 "CHECK": lambda self: self.expression( 830 exp.CheckColumnConstraint, 831 this=self._parse_wrapped(self._parse_conjunction), 832 enforced=self._match_text_seq("ENFORCED"), 833 ), 834 "COLLATE": lambda self: self.expression( 835 exp.CollateColumnConstraint, this=self._parse_var() 836 ), 837 "COMMENT": lambda self: self.expression( 838 exp.CommentColumnConstraint, this=self._parse_string() 839 ), 840 "COMPRESS": lambda self: self._parse_compress(), 841 "CLUSTERED": lambda self: self.expression( 842 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 843 ), 844 "NONCLUSTERED": lambda self: self.expression( 845 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 846 ), 847 "DEFAULT": lambda self: self.expression( 848 exp.DefaultColumnConstraint, this=self._parse_bitwise() 849 ), 850 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 851 "EXCLUDE": lambda self: self.expression( 852 exp.ExcludeColumnConstraint, this=self._parse_index_params() 853 ), 854 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 855 "FORMAT": lambda self: self.expression( 856 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 857 ), 858 "GENERATED": lambda self: self._parse_generated_as_identity(), 859 "IDENTITY": lambda self: self._parse_auto_increment(), 860 "INLINE": lambda self: self._parse_inline(), 861 "LIKE": lambda self: self._parse_create_like(), 862 "NOT": lambda self: self._parse_not_constraint(), 863 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 864 "ON": lambda self: ( 865 self._match(TokenType.UPDATE) 866 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 867 ) 868 or self.expression(exp.OnProperty, this=self._parse_id_var()), 869 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 870 "PERIOD": lambda self: self._parse_period_for_system_time(), 871 "PRIMARY KEY": lambda self: self._parse_primary_key(), 872 "REFERENCES": lambda self: self._parse_references(match=False), 873 "TITLE": lambda self: self.expression( 874 exp.TitleColumnConstraint, this=self._parse_var_or_string() 875 ), 876 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 877 "UNIQUE": lambda self: self._parse_unique(), 878 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 879 "WITH": lambda self: self.expression( 880 exp.Properties, expressions=self._parse_wrapped_properties() 881 ), 882 } 883 884 ALTER_PARSERS = { 885 "ADD": lambda self: self._parse_alter_table_add(), 886 "ALTER": lambda self: self._parse_alter_table_alter(), 887 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 888 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 889 "DROP": lambda self: self._parse_alter_table_drop(), 890 "RENAME": lambda self: self._parse_alter_table_rename(), 891 } 892 893 SCHEMA_UNNAMED_CONSTRAINTS = { 894 "CHECK", 895 "EXCLUDE", 896 "FOREIGN KEY", 897 "LIKE", 898 "PERIOD", 899 "PRIMARY KEY", 900 "UNIQUE", 901 } 902 903 NO_PAREN_FUNCTION_PARSERS = { 904 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 905 "CASE": lambda self: self._parse_case(), 906 "IF": lambda self: self._parse_if(), 907 "NEXT": lambda self: self._parse_next_value_for(), 908 } 909 910 INVALID_FUNC_NAME_TOKENS = { 911 TokenType.IDENTIFIER, 912 TokenType.STRING, 913 } 914 915 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 916 917 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 918 919 FUNCTION_PARSERS = { 920 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 921 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 922 "DECODE": lambda self: self._parse_decode(), 923 "EXTRACT": lambda self: self._parse_extract(), 924 "JSON_OBJECT": lambda self: self._parse_json_object(), 925 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 926 "JSON_TABLE": lambda self: self._parse_json_table(), 927 "MATCH": lambda self: self._parse_match_against(), 928 "OPENJSON": lambda self: self._parse_open_json(), 929 "POSITION": lambda self: self._parse_position(), 930 "PREDICT": lambda self: self._parse_predict(), 931 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 932 "STRING_AGG": lambda self: self._parse_string_agg(), 933 "SUBSTRING": lambda self: self._parse_substring(), 934 "TRIM": lambda self: self._parse_trim(), 935 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 936 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 937 } 938 939 QUERY_MODIFIER_PARSERS = { 940 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 941 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 942 TokenType.WHERE: lambda self: ("where", self._parse_where()), 943 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 944 TokenType.HAVING: lambda self: ("having", self._parse_having()), 945 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 946 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 947 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 948 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 949 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 950 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 951 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 952 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 953 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 954 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 955 TokenType.CLUSTER_BY: lambda self: ( 956 "cluster", 957 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 958 ), 959 TokenType.DISTRIBUTE_BY: lambda self: ( 960 "distribute", 961 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 962 ), 963 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 964 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 965 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 966 } 967 968 SET_PARSERS = { 969 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 970 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 971 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 972 "TRANSACTION": lambda self: self._parse_set_transaction(), 973 } 974 975 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 976 977 TYPE_LITERAL_PARSERS = { 978 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 979 } 980 981 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 982 983 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 984 985 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 986 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 987 "ISOLATION": ( 988 ("LEVEL", "REPEATABLE", "READ"), 989 ("LEVEL", "READ", "COMMITTED"), 990 ("LEVEL", "READ", "UNCOMITTED"), 991 ("LEVEL", "SERIALIZABLE"), 992 ), 993 "READ": ("WRITE", "ONLY"), 994 } 995 996 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 997 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 998 ) 999 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1000 1001 CREATE_SEQUENCE: OPTIONS_TYPE = { 1002 "SCALE": ("EXTEND", "NOEXTEND"), 1003 "SHARD": ("EXTEND", "NOEXTEND"), 1004 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1005 **dict.fromkeys( 1006 ( 1007 "SESSION", 1008 "GLOBAL", 1009 "KEEP", 1010 "NOKEEP", 1011 "ORDER", 1012 "NOORDER", 1013 "NOCACHE", 1014 "CYCLE", 1015 "NOCYCLE", 1016 "NOMINVALUE", 1017 "NOMAXVALUE", 1018 "NOSCALE", 1019 "NOSHARD", 1020 ), 1021 tuple(), 1022 ), 1023 } 1024 1025 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1026 1027 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1028 1029 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1030 1031 CLONE_KEYWORDS = {"CLONE", "COPY"} 1032 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1033 1034 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1035 1036 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1037 1038 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1039 1040 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1041 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1042 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1043 1044 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1045 1046 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1047 1048 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1049 1050 DISTINCT_TOKENS = {TokenType.DISTINCT} 1051 1052 NULL_TOKENS = {TokenType.NULL} 1053 1054 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1055 1056 STRICT_CAST = True 1057 1058 PREFIXED_PIVOT_COLUMNS = False 1059 IDENTIFY_PIVOT_STRINGS = False 1060 1061 LOG_DEFAULTS_TO_LN = False 1062 1063 # Whether ADD is present for each column added by ALTER TABLE 1064 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1065 1066 # Whether the table sample clause expects CSV syntax 1067 TABLESAMPLE_CSV = False 1068 1069 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1070 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1071 1072 # Whether the TRIM function expects the characters to trim as its first argument 1073 TRIM_PATTERN_FIRST = False 1074 1075 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1076 STRING_ALIASES = False 1077 1078 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1079 MODIFIERS_ATTACHED_TO_UNION = True 1080 UNION_MODIFIERS = {"order", "limit", "offset"} 1081 1082 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1083 NO_PAREN_IF_COMMANDS = True 1084 1085 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1086 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1087 1088 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1089 # If this is True and '(' is not found, the keyword will be treated as an identifier 1090 VALUES_FOLLOWED_BY_PAREN = True 1091 1092 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1093 SUPPORTS_IMPLICIT_UNNEST = False 1094 1095 __slots__ = ( 1096 "error_level", 1097 "error_message_context", 1098 "max_errors", 1099 "dialect", 1100 "sql", 1101 "errors", 1102 "_tokens", 1103 "_index", 1104 "_curr", 1105 "_next", 1106 "_prev", 1107 "_prev_comments", 1108 ) 1109 1110 # Autofilled 1111 SHOW_TRIE: t.Dict = {} 1112 SET_TRIE: t.Dict = {} 1113 1114 def __init__( 1115 self, 1116 error_level: t.Optional[ErrorLevel] = None, 1117 error_message_context: int = 100, 1118 max_errors: int = 3, 1119 dialect: DialectType = None, 1120 ): 1121 from sqlglot.dialects import Dialect 1122 1123 self.error_level = error_level or ErrorLevel.IMMEDIATE 1124 self.error_message_context = error_message_context 1125 self.max_errors = max_errors 1126 self.dialect = Dialect.get_or_raise(dialect) 1127 self.reset() 1128 1129 def reset(self): 1130 self.sql = "" 1131 self.errors = [] 1132 self._tokens = [] 1133 self._index = 0 1134 self._curr = None 1135 self._next = None 1136 self._prev = None 1137 self._prev_comments = None 1138 1139 def parse( 1140 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1141 ) -> t.List[t.Optional[exp.Expression]]: 1142 """ 1143 Parses a list of tokens and returns a list of syntax trees, one tree 1144 per parsed SQL statement. 1145 1146 Args: 1147 raw_tokens: The list of tokens. 1148 sql: The original SQL string, used to produce helpful debug messages. 1149 1150 Returns: 1151 The list of the produced syntax trees. 1152 """ 1153 return self._parse( 1154 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1155 ) 1156 1157 def parse_into( 1158 self, 1159 expression_types: exp.IntoType, 1160 raw_tokens: t.List[Token], 1161 sql: t.Optional[str] = None, 1162 ) -> t.List[t.Optional[exp.Expression]]: 1163 """ 1164 Parses a list of tokens into a given Expression type. If a collection of Expression 1165 types is given instead, this method will try to parse the token list into each one 1166 of them, stopping at the first for which the parsing succeeds. 1167 1168 Args: 1169 expression_types: The expression type(s) to try and parse the token list into. 1170 raw_tokens: The list of tokens. 1171 sql: The original SQL string, used to produce helpful debug messages. 1172 1173 Returns: 1174 The target Expression. 1175 """ 1176 errors = [] 1177 for expression_type in ensure_list(expression_types): 1178 parser = self.EXPRESSION_PARSERS.get(expression_type) 1179 if not parser: 1180 raise TypeError(f"No parser registered for {expression_type}") 1181 1182 try: 1183 return self._parse(parser, raw_tokens, sql) 1184 except ParseError as e: 1185 e.errors[0]["into_expression"] = expression_type 1186 errors.append(e) 1187 1188 raise ParseError( 1189 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1190 errors=merge_errors(errors), 1191 ) from errors[-1] 1192 1193 def _parse( 1194 self, 1195 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1196 raw_tokens: t.List[Token], 1197 sql: t.Optional[str] = None, 1198 ) -> t.List[t.Optional[exp.Expression]]: 1199 self.reset() 1200 self.sql = sql or "" 1201 1202 total = len(raw_tokens) 1203 chunks: t.List[t.List[Token]] = [[]] 1204 1205 for i, token in enumerate(raw_tokens): 1206 if token.token_type == TokenType.SEMICOLON: 1207 if i < total - 1: 1208 chunks.append([]) 1209 else: 1210 chunks[-1].append(token) 1211 1212 expressions = [] 1213 1214 for tokens in chunks: 1215 self._index = -1 1216 self._tokens = tokens 1217 self._advance() 1218 1219 expressions.append(parse_method(self)) 1220 1221 if self._index < len(self._tokens): 1222 self.raise_error("Invalid expression / Unexpected token") 1223 1224 self.check_errors() 1225 1226 return expressions 1227 1228 def check_errors(self) -> None: 1229 """Logs or raises any found errors, depending on the chosen error level setting.""" 1230 if self.error_level == ErrorLevel.WARN: 1231 for error in self.errors: 1232 logger.error(str(error)) 1233 elif self.error_level == ErrorLevel.RAISE and self.errors: 1234 raise ParseError( 1235 concat_messages(self.errors, self.max_errors), 1236 errors=merge_errors(self.errors), 1237 ) 1238 1239 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1240 """ 1241 Appends an error in the list of recorded errors or raises it, depending on the chosen 1242 error level setting. 1243 """ 1244 token = token or self._curr or self._prev or Token.string("") 1245 start = token.start 1246 end = token.end + 1 1247 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1248 highlight = self.sql[start:end] 1249 end_context = self.sql[end : end + self.error_message_context] 1250 1251 error = ParseError.new( 1252 f"{message}. Line {token.line}, Col: {token.col}.\n" 1253 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1254 description=message, 1255 line=token.line, 1256 col=token.col, 1257 start_context=start_context, 1258 highlight=highlight, 1259 end_context=end_context, 1260 ) 1261 1262 if self.error_level == ErrorLevel.IMMEDIATE: 1263 raise error 1264 1265 self.errors.append(error) 1266 1267 def expression( 1268 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1269 ) -> E: 1270 """ 1271 Creates a new, validated Expression. 1272 1273 Args: 1274 exp_class: The expression class to instantiate. 1275 comments: An optional list of comments to attach to the expression. 1276 kwargs: The arguments to set for the expression along with their respective values. 1277 1278 Returns: 1279 The target expression. 1280 """ 1281 instance = exp_class(**kwargs) 1282 instance.add_comments(comments) if comments else self._add_comments(instance) 1283 return self.validate_expression(instance) 1284 1285 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1286 if expression and self._prev_comments: 1287 expression.add_comments(self._prev_comments) 1288 self._prev_comments = None 1289 1290 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1291 """ 1292 Validates an Expression, making sure that all its mandatory arguments are set. 1293 1294 Args: 1295 expression: The expression to validate. 1296 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1297 1298 Returns: 1299 The validated expression. 1300 """ 1301 if self.error_level != ErrorLevel.IGNORE: 1302 for error_message in expression.error_messages(args): 1303 self.raise_error(error_message) 1304 1305 return expression 1306 1307 def _find_sql(self, start: Token, end: Token) -> str: 1308 return self.sql[start.start : end.end + 1] 1309 1310 def _is_connected(self) -> bool: 1311 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1312 1313 def _advance(self, times: int = 1) -> None: 1314 self._index += times 1315 self._curr = seq_get(self._tokens, self._index) 1316 self._next = seq_get(self._tokens, self._index + 1) 1317 1318 if self._index > 0: 1319 self._prev = self._tokens[self._index - 1] 1320 self._prev_comments = self._prev.comments 1321 else: 1322 self._prev = None 1323 self._prev_comments = None 1324 1325 def _retreat(self, index: int) -> None: 1326 if index != self._index: 1327 self._advance(index - self._index) 1328 1329 def _warn_unsupported(self) -> None: 1330 if len(self._tokens) <= 1: 1331 return 1332 1333 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1334 # interested in emitting a warning for the one being currently processed. 1335 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1336 1337 logger.warning( 1338 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1339 ) 1340 1341 def _parse_command(self) -> exp.Command: 1342 self._warn_unsupported() 1343 return self.expression( 1344 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1345 ) 1346 1347 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1348 start = self._prev 1349 exists = self._parse_exists() if allow_exists else None 1350 1351 self._match(TokenType.ON) 1352 1353 kind = self._match_set(self.CREATABLES) and self._prev 1354 if not kind: 1355 return self._parse_as_command(start) 1356 1357 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1358 this = self._parse_user_defined_function(kind=kind.token_type) 1359 elif kind.token_type == TokenType.TABLE: 1360 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1361 elif kind.token_type == TokenType.COLUMN: 1362 this = self._parse_column() 1363 else: 1364 this = self._parse_id_var() 1365 1366 self._match(TokenType.IS) 1367 1368 return self.expression( 1369 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1370 ) 1371 1372 def _parse_to_table( 1373 self, 1374 ) -> exp.ToTableProperty: 1375 table = self._parse_table_parts(schema=True) 1376 return self.expression(exp.ToTableProperty, this=table) 1377 1378 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1379 def _parse_ttl(self) -> exp.Expression: 1380 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1381 this = self._parse_bitwise() 1382 1383 if self._match_text_seq("DELETE"): 1384 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1385 if self._match_text_seq("RECOMPRESS"): 1386 return self.expression( 1387 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1388 ) 1389 if self._match_text_seq("TO", "DISK"): 1390 return self.expression( 1391 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1392 ) 1393 if self._match_text_seq("TO", "VOLUME"): 1394 return self.expression( 1395 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1396 ) 1397 1398 return this 1399 1400 expressions = self._parse_csv(_parse_ttl_action) 1401 where = self._parse_where() 1402 group = self._parse_group() 1403 1404 aggregates = None 1405 if group and self._match(TokenType.SET): 1406 aggregates = self._parse_csv(self._parse_set_item) 1407 1408 return self.expression( 1409 exp.MergeTreeTTL, 1410 expressions=expressions, 1411 where=where, 1412 group=group, 1413 aggregates=aggregates, 1414 ) 1415 1416 def _parse_statement(self) -> t.Optional[exp.Expression]: 1417 if self._curr is None: 1418 return None 1419 1420 if self._match_set(self.STATEMENT_PARSERS): 1421 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1422 1423 if self._match_set(Tokenizer.COMMANDS): 1424 return self._parse_command() 1425 1426 expression = self._parse_expression() 1427 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1428 return self._parse_query_modifiers(expression) 1429 1430 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1431 start = self._prev 1432 temporary = self._match(TokenType.TEMPORARY) 1433 materialized = self._match_text_seq("MATERIALIZED") 1434 1435 kind = self._match_set(self.CREATABLES) and self._prev.text 1436 if not kind: 1437 return self._parse_as_command(start) 1438 1439 if_exists = exists or self._parse_exists() 1440 table = self._parse_table_parts( 1441 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1442 ) 1443 1444 if self._match(TokenType.L_PAREN, advance=False): 1445 expressions = self._parse_wrapped_csv(self._parse_types) 1446 else: 1447 expressions = None 1448 1449 return self.expression( 1450 exp.Drop, 1451 comments=start.comments, 1452 exists=if_exists, 1453 this=table, 1454 expressions=expressions, 1455 kind=kind, 1456 temporary=temporary, 1457 materialized=materialized, 1458 cascade=self._match_text_seq("CASCADE"), 1459 constraints=self._match_text_seq("CONSTRAINTS"), 1460 purge=self._match_text_seq("PURGE"), 1461 ) 1462 1463 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1464 return ( 1465 self._match_text_seq("IF") 1466 and (not not_ or self._match(TokenType.NOT)) 1467 and self._match(TokenType.EXISTS) 1468 ) 1469 1470 def _parse_create(self) -> exp.Create | exp.Command: 1471 # Note: this can't be None because we've matched a statement parser 1472 start = self._prev 1473 comments = self._prev_comments 1474 1475 replace = ( 1476 start.token_type == TokenType.REPLACE 1477 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1478 or self._match_pair(TokenType.OR, TokenType.ALTER) 1479 ) 1480 1481 unique = self._match(TokenType.UNIQUE) 1482 1483 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1484 self._advance() 1485 1486 properties = None 1487 create_token = self._match_set(self.CREATABLES) and self._prev 1488 1489 if not create_token: 1490 # exp.Properties.Location.POST_CREATE 1491 properties = self._parse_properties() 1492 create_token = self._match_set(self.CREATABLES) and self._prev 1493 1494 if not properties or not create_token: 1495 return self._parse_as_command(start) 1496 1497 exists = self._parse_exists(not_=True) 1498 this = None 1499 expression: t.Optional[exp.Expression] = None 1500 indexes = None 1501 no_schema_binding = None 1502 begin = None 1503 end = None 1504 clone = None 1505 1506 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1507 nonlocal properties 1508 if properties and temp_props: 1509 properties.expressions.extend(temp_props.expressions) 1510 elif temp_props: 1511 properties = temp_props 1512 1513 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1514 this = self._parse_user_defined_function(kind=create_token.token_type) 1515 1516 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1517 extend_props(self._parse_properties()) 1518 1519 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1520 1521 if not expression: 1522 if self._match(TokenType.COMMAND): 1523 expression = self._parse_as_command(self._prev) 1524 else: 1525 begin = self._match(TokenType.BEGIN) 1526 return_ = self._match_text_seq("RETURN") 1527 1528 if self._match(TokenType.STRING, advance=False): 1529 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1530 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1531 expression = self._parse_string() 1532 extend_props(self._parse_properties()) 1533 else: 1534 expression = self._parse_statement() 1535 1536 end = self._match_text_seq("END") 1537 1538 if return_: 1539 expression = self.expression(exp.Return, this=expression) 1540 elif create_token.token_type == TokenType.INDEX: 1541 this = self._parse_index(index=self._parse_id_var()) 1542 elif create_token.token_type in self.DB_CREATABLES: 1543 table_parts = self._parse_table_parts( 1544 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1545 ) 1546 1547 # exp.Properties.Location.POST_NAME 1548 self._match(TokenType.COMMA) 1549 extend_props(self._parse_properties(before=True)) 1550 1551 this = self._parse_schema(this=table_parts) 1552 1553 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1554 extend_props(self._parse_properties()) 1555 1556 self._match(TokenType.ALIAS) 1557 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1558 # exp.Properties.Location.POST_ALIAS 1559 extend_props(self._parse_properties()) 1560 1561 if create_token.token_type == TokenType.SEQUENCE: 1562 expression = self._parse_types() 1563 extend_props(self._parse_properties()) 1564 else: 1565 expression = self._parse_ddl_select() 1566 1567 if create_token.token_type == TokenType.TABLE: 1568 # exp.Properties.Location.POST_EXPRESSION 1569 extend_props(self._parse_properties()) 1570 1571 indexes = [] 1572 while True: 1573 index = self._parse_index() 1574 1575 # exp.Properties.Location.POST_INDEX 1576 extend_props(self._parse_properties()) 1577 1578 if not index: 1579 break 1580 else: 1581 self._match(TokenType.COMMA) 1582 indexes.append(index) 1583 elif create_token.token_type == TokenType.VIEW: 1584 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1585 no_schema_binding = True 1586 1587 shallow = self._match_text_seq("SHALLOW") 1588 1589 if self._match_texts(self.CLONE_KEYWORDS): 1590 copy = self._prev.text.lower() == "copy" 1591 clone = self.expression( 1592 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1593 ) 1594 1595 if self._curr: 1596 return self._parse_as_command(start) 1597 1598 return self.expression( 1599 exp.Create, 1600 comments=comments, 1601 this=this, 1602 kind=create_token.text.upper(), 1603 replace=replace, 1604 unique=unique, 1605 expression=expression, 1606 exists=exists, 1607 properties=properties, 1608 indexes=indexes, 1609 no_schema_binding=no_schema_binding, 1610 begin=begin, 1611 end=end, 1612 clone=clone, 1613 ) 1614 1615 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1616 seq = exp.SequenceProperties() 1617 1618 options = [] 1619 index = self._index 1620 1621 while self._curr: 1622 if self._match_text_seq("INCREMENT"): 1623 self._match_text_seq("BY") 1624 self._match_text_seq("=") 1625 seq.set("increment", self._parse_term()) 1626 elif self._match_text_seq("MINVALUE"): 1627 seq.set("minvalue", self._parse_term()) 1628 elif self._match_text_seq("MAXVALUE"): 1629 seq.set("maxvalue", self._parse_term()) 1630 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1631 self._match_text_seq("=") 1632 seq.set("start", self._parse_term()) 1633 elif self._match_text_seq("CACHE"): 1634 # T-SQL allows empty CACHE which is initialized dynamically 1635 seq.set("cache", self._parse_number() or True) 1636 elif self._match_text_seq("OWNED", "BY"): 1637 # "OWNED BY NONE" is the default 1638 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1639 else: 1640 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1641 if opt: 1642 options.append(opt) 1643 else: 1644 break 1645 1646 seq.set("options", options if options else None) 1647 return None if self._index == index else seq 1648 1649 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1650 # only used for teradata currently 1651 self._match(TokenType.COMMA) 1652 1653 kwargs = { 1654 "no": self._match_text_seq("NO"), 1655 "dual": self._match_text_seq("DUAL"), 1656 "before": self._match_text_seq("BEFORE"), 1657 "default": self._match_text_seq("DEFAULT"), 1658 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1659 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1660 "after": self._match_text_seq("AFTER"), 1661 "minimum": self._match_texts(("MIN", "MINIMUM")), 1662 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1663 } 1664 1665 if self._match_texts(self.PROPERTY_PARSERS): 1666 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1667 try: 1668 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1669 except TypeError: 1670 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1671 1672 return None 1673 1674 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1675 return self._parse_wrapped_csv(self._parse_property) 1676 1677 def _parse_property(self) -> t.Optional[exp.Expression]: 1678 if self._match_texts(self.PROPERTY_PARSERS): 1679 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1680 1681 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1682 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1683 1684 if self._match_text_seq("COMPOUND", "SORTKEY"): 1685 return self._parse_sortkey(compound=True) 1686 1687 if self._match_text_seq("SQL", "SECURITY"): 1688 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1689 1690 index = self._index 1691 key = self._parse_column() 1692 1693 if not self._match(TokenType.EQ): 1694 self._retreat(index) 1695 return self._parse_sequence_properties() 1696 1697 return self.expression( 1698 exp.Property, 1699 this=key.to_dot() if isinstance(key, exp.Column) else key, 1700 value=self._parse_column() or self._parse_var(any_token=True), 1701 ) 1702 1703 def _parse_stored(self) -> exp.FileFormatProperty: 1704 self._match(TokenType.ALIAS) 1705 1706 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1707 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1708 1709 return self.expression( 1710 exp.FileFormatProperty, 1711 this=( 1712 self.expression( 1713 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1714 ) 1715 if input_format or output_format 1716 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1717 ), 1718 ) 1719 1720 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1721 self._match(TokenType.EQ) 1722 self._match(TokenType.ALIAS) 1723 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1724 1725 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1726 properties = [] 1727 while True: 1728 if before: 1729 prop = self._parse_property_before() 1730 else: 1731 prop = self._parse_property() 1732 if not prop: 1733 break 1734 for p in ensure_list(prop): 1735 properties.append(p) 1736 1737 if properties: 1738 return self.expression(exp.Properties, expressions=properties) 1739 1740 return None 1741 1742 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1743 return self.expression( 1744 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1745 ) 1746 1747 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1748 if self._index >= 2: 1749 pre_volatile_token = self._tokens[self._index - 2] 1750 else: 1751 pre_volatile_token = None 1752 1753 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1754 return exp.VolatileProperty() 1755 1756 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1757 1758 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1759 self._match_pair(TokenType.EQ, TokenType.ON) 1760 1761 prop = self.expression(exp.WithSystemVersioningProperty) 1762 if self._match(TokenType.L_PAREN): 1763 self._match_text_seq("HISTORY_TABLE", "=") 1764 prop.set("this", self._parse_table_parts()) 1765 1766 if self._match(TokenType.COMMA): 1767 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1768 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1769 1770 self._match_r_paren() 1771 1772 return prop 1773 1774 def _parse_with_property( 1775 self, 1776 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1777 if self._match(TokenType.L_PAREN, advance=False): 1778 return self._parse_wrapped_properties() 1779 1780 if self._match_text_seq("JOURNAL"): 1781 return self._parse_withjournaltable() 1782 1783 if self._match_text_seq("DATA"): 1784 return self._parse_withdata(no=False) 1785 elif self._match_text_seq("NO", "DATA"): 1786 return self._parse_withdata(no=True) 1787 1788 if not self._next: 1789 return None 1790 1791 return self._parse_withisolatedloading() 1792 1793 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1794 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1795 self._match(TokenType.EQ) 1796 1797 user = self._parse_id_var() 1798 self._match(TokenType.PARAMETER) 1799 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1800 1801 if not user or not host: 1802 return None 1803 1804 return exp.DefinerProperty(this=f"{user}@{host}") 1805 1806 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1807 self._match(TokenType.TABLE) 1808 self._match(TokenType.EQ) 1809 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1810 1811 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1812 return self.expression(exp.LogProperty, no=no) 1813 1814 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1815 return self.expression(exp.JournalProperty, **kwargs) 1816 1817 def _parse_checksum(self) -> exp.ChecksumProperty: 1818 self._match(TokenType.EQ) 1819 1820 on = None 1821 if self._match(TokenType.ON): 1822 on = True 1823 elif self._match_text_seq("OFF"): 1824 on = False 1825 1826 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1827 1828 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1829 return self.expression( 1830 exp.Cluster, 1831 expressions=( 1832 self._parse_wrapped_csv(self._parse_ordered) 1833 if wrapped 1834 else self._parse_csv(self._parse_ordered) 1835 ), 1836 ) 1837 1838 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1839 self._match_text_seq("BY") 1840 1841 self._match_l_paren() 1842 expressions = self._parse_csv(self._parse_column) 1843 self._match_r_paren() 1844 1845 if self._match_text_seq("SORTED", "BY"): 1846 self._match_l_paren() 1847 sorted_by = self._parse_csv(self._parse_ordered) 1848 self._match_r_paren() 1849 else: 1850 sorted_by = None 1851 1852 self._match(TokenType.INTO) 1853 buckets = self._parse_number() 1854 self._match_text_seq("BUCKETS") 1855 1856 return self.expression( 1857 exp.ClusteredByProperty, 1858 expressions=expressions, 1859 sorted_by=sorted_by, 1860 buckets=buckets, 1861 ) 1862 1863 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1864 if not self._match_text_seq("GRANTS"): 1865 self._retreat(self._index - 1) 1866 return None 1867 1868 return self.expression(exp.CopyGrantsProperty) 1869 1870 def _parse_freespace(self) -> exp.FreespaceProperty: 1871 self._match(TokenType.EQ) 1872 return self.expression( 1873 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1874 ) 1875 1876 def _parse_mergeblockratio( 1877 self, no: bool = False, default: bool = False 1878 ) -> exp.MergeBlockRatioProperty: 1879 if self._match(TokenType.EQ): 1880 return self.expression( 1881 exp.MergeBlockRatioProperty, 1882 this=self._parse_number(), 1883 percent=self._match(TokenType.PERCENT), 1884 ) 1885 1886 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1887 1888 def _parse_datablocksize( 1889 self, 1890 default: t.Optional[bool] = None, 1891 minimum: t.Optional[bool] = None, 1892 maximum: t.Optional[bool] = None, 1893 ) -> exp.DataBlocksizeProperty: 1894 self._match(TokenType.EQ) 1895 size = self._parse_number() 1896 1897 units = None 1898 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1899 units = self._prev.text 1900 1901 return self.expression( 1902 exp.DataBlocksizeProperty, 1903 size=size, 1904 units=units, 1905 default=default, 1906 minimum=minimum, 1907 maximum=maximum, 1908 ) 1909 1910 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1911 self._match(TokenType.EQ) 1912 always = self._match_text_seq("ALWAYS") 1913 manual = self._match_text_seq("MANUAL") 1914 never = self._match_text_seq("NEVER") 1915 default = self._match_text_seq("DEFAULT") 1916 1917 autotemp = None 1918 if self._match_text_seq("AUTOTEMP"): 1919 autotemp = self._parse_schema() 1920 1921 return self.expression( 1922 exp.BlockCompressionProperty, 1923 always=always, 1924 manual=manual, 1925 never=never, 1926 default=default, 1927 autotemp=autotemp, 1928 ) 1929 1930 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1931 no = self._match_text_seq("NO") 1932 concurrent = self._match_text_seq("CONCURRENT") 1933 self._match_text_seq("ISOLATED", "LOADING") 1934 for_all = self._match_text_seq("FOR", "ALL") 1935 for_insert = self._match_text_seq("FOR", "INSERT") 1936 for_none = self._match_text_seq("FOR", "NONE") 1937 return self.expression( 1938 exp.IsolatedLoadingProperty, 1939 no=no, 1940 concurrent=concurrent, 1941 for_all=for_all, 1942 for_insert=for_insert, 1943 for_none=for_none, 1944 ) 1945 1946 def _parse_locking(self) -> exp.LockingProperty: 1947 if self._match(TokenType.TABLE): 1948 kind = "TABLE" 1949 elif self._match(TokenType.VIEW): 1950 kind = "VIEW" 1951 elif self._match(TokenType.ROW): 1952 kind = "ROW" 1953 elif self._match_text_seq("DATABASE"): 1954 kind = "DATABASE" 1955 else: 1956 kind = None 1957 1958 if kind in ("DATABASE", "TABLE", "VIEW"): 1959 this = self._parse_table_parts() 1960 else: 1961 this = None 1962 1963 if self._match(TokenType.FOR): 1964 for_or_in = "FOR" 1965 elif self._match(TokenType.IN): 1966 for_or_in = "IN" 1967 else: 1968 for_or_in = None 1969 1970 if self._match_text_seq("ACCESS"): 1971 lock_type = "ACCESS" 1972 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1973 lock_type = "EXCLUSIVE" 1974 elif self._match_text_seq("SHARE"): 1975 lock_type = "SHARE" 1976 elif self._match_text_seq("READ"): 1977 lock_type = "READ" 1978 elif self._match_text_seq("WRITE"): 1979 lock_type = "WRITE" 1980 elif self._match_text_seq("CHECKSUM"): 1981 lock_type = "CHECKSUM" 1982 else: 1983 lock_type = None 1984 1985 override = self._match_text_seq("OVERRIDE") 1986 1987 return self.expression( 1988 exp.LockingProperty, 1989 this=this, 1990 kind=kind, 1991 for_or_in=for_or_in, 1992 lock_type=lock_type, 1993 override=override, 1994 ) 1995 1996 def _parse_partition_by(self) -> t.List[exp.Expression]: 1997 if self._match(TokenType.PARTITION_BY): 1998 return self._parse_csv(self._parse_conjunction) 1999 return [] 2000 2001 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2002 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2003 if self._match_text_seq("MINVALUE"): 2004 return exp.var("MINVALUE") 2005 if self._match_text_seq("MAXVALUE"): 2006 return exp.var("MAXVALUE") 2007 return self._parse_bitwise() 2008 2009 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2010 expression = None 2011 from_expressions = None 2012 to_expressions = None 2013 2014 if self._match(TokenType.IN): 2015 this = self._parse_wrapped_csv(self._parse_bitwise) 2016 elif self._match(TokenType.FROM): 2017 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2018 self._match_text_seq("TO") 2019 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2020 elif self._match_text_seq("WITH", "(", "MODULUS"): 2021 this = self._parse_number() 2022 self._match_text_seq(",", "REMAINDER") 2023 expression = self._parse_number() 2024 self._match_r_paren() 2025 else: 2026 self.raise_error("Failed to parse partition bound spec.") 2027 2028 return self.expression( 2029 exp.PartitionBoundSpec, 2030 this=this, 2031 expression=expression, 2032 from_expressions=from_expressions, 2033 to_expressions=to_expressions, 2034 ) 2035 2036 # https://www.postgresql.org/docs/current/sql-createtable.html 2037 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2038 if not self._match_text_seq("OF"): 2039 self._retreat(self._index - 1) 2040 return None 2041 2042 this = self._parse_table(schema=True) 2043 2044 if self._match(TokenType.DEFAULT): 2045 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2046 elif self._match_text_seq("FOR", "VALUES"): 2047 expression = self._parse_partition_bound_spec() 2048 else: 2049 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2050 2051 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2052 2053 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2054 self._match(TokenType.EQ) 2055 return self.expression( 2056 exp.PartitionedByProperty, 2057 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2058 ) 2059 2060 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2061 if self._match_text_seq("AND", "STATISTICS"): 2062 statistics = True 2063 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2064 statistics = False 2065 else: 2066 statistics = None 2067 2068 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2069 2070 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2071 if self._match_text_seq("SQL"): 2072 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2073 return None 2074 2075 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2076 if self._match_text_seq("SQL", "DATA"): 2077 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2078 return None 2079 2080 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2081 if self._match_text_seq("PRIMARY", "INDEX"): 2082 return exp.NoPrimaryIndexProperty() 2083 if self._match_text_seq("SQL"): 2084 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2085 return None 2086 2087 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2088 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2089 return exp.OnCommitProperty() 2090 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2091 return exp.OnCommitProperty(delete=True) 2092 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2093 2094 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2095 if self._match_text_seq("SQL", "DATA"): 2096 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2097 return None 2098 2099 def _parse_distkey(self) -> exp.DistKeyProperty: 2100 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2101 2102 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2103 table = self._parse_table(schema=True) 2104 2105 options = [] 2106 while self._match_texts(("INCLUDING", "EXCLUDING")): 2107 this = self._prev.text.upper() 2108 2109 id_var = self._parse_id_var() 2110 if not id_var: 2111 return None 2112 2113 options.append( 2114 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2115 ) 2116 2117 return self.expression(exp.LikeProperty, this=table, expressions=options) 2118 2119 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2120 return self.expression( 2121 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2122 ) 2123 2124 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2125 self._match(TokenType.EQ) 2126 return self.expression( 2127 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2128 ) 2129 2130 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2131 self._match_text_seq("WITH", "CONNECTION") 2132 return self.expression( 2133 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2134 ) 2135 2136 def _parse_returns(self) -> exp.ReturnsProperty: 2137 value: t.Optional[exp.Expression] 2138 is_table = self._match(TokenType.TABLE) 2139 2140 if is_table: 2141 if self._match(TokenType.LT): 2142 value = self.expression( 2143 exp.Schema, 2144 this="TABLE", 2145 expressions=self._parse_csv(self._parse_struct_types), 2146 ) 2147 if not self._match(TokenType.GT): 2148 self.raise_error("Expecting >") 2149 else: 2150 value = self._parse_schema(exp.var("TABLE")) 2151 else: 2152 value = self._parse_types() 2153 2154 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2155 2156 def _parse_describe(self) -> exp.Describe: 2157 kind = self._match_set(self.CREATABLES) and self._prev.text 2158 extended = self._match_text_seq("EXTENDED") 2159 this = self._parse_table(schema=True) 2160 properties = self._parse_properties() 2161 expressions = properties.expressions if properties else None 2162 return self.expression( 2163 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2164 ) 2165 2166 def _parse_insert(self) -> exp.Insert: 2167 comments = ensure_list(self._prev_comments) 2168 hint = self._parse_hint() 2169 overwrite = self._match(TokenType.OVERWRITE) 2170 ignore = self._match(TokenType.IGNORE) 2171 local = self._match_text_seq("LOCAL") 2172 alternative = None 2173 is_function = None 2174 2175 if self._match_text_seq("DIRECTORY"): 2176 this: t.Optional[exp.Expression] = self.expression( 2177 exp.Directory, 2178 this=self._parse_var_or_string(), 2179 local=local, 2180 row_format=self._parse_row_format(match_row=True), 2181 ) 2182 else: 2183 if self._match(TokenType.OR): 2184 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2185 2186 self._match(TokenType.INTO) 2187 comments += ensure_list(self._prev_comments) 2188 self._match(TokenType.TABLE) 2189 is_function = self._match(TokenType.FUNCTION) 2190 2191 this = self._parse_table(schema=True) if not is_function else self._parse_function() 2192 2193 returning = self._parse_returning() 2194 2195 return self.expression( 2196 exp.Insert, 2197 comments=comments, 2198 hint=hint, 2199 is_function=is_function, 2200 this=this, 2201 by_name=self._match_text_seq("BY", "NAME"), 2202 exists=self._parse_exists(), 2203 partition=self._parse_partition(), 2204 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2205 and self._parse_conjunction(), 2206 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2207 conflict=self._parse_on_conflict(), 2208 returning=returning or self._parse_returning(), 2209 overwrite=overwrite, 2210 alternative=alternative, 2211 ignore=ignore, 2212 ) 2213 2214 def _parse_kill(self) -> exp.Kill: 2215 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2216 2217 return self.expression( 2218 exp.Kill, 2219 this=self._parse_primary(), 2220 kind=kind, 2221 ) 2222 2223 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2224 conflict = self._match_text_seq("ON", "CONFLICT") 2225 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2226 2227 if not conflict and not duplicate: 2228 return None 2229 2230 conflict_keys = None 2231 constraint = None 2232 2233 if conflict: 2234 if self._match_text_seq("ON", "CONSTRAINT"): 2235 constraint = self._parse_id_var() 2236 elif self._match(TokenType.L_PAREN): 2237 conflict_keys = self._parse_csv(self._parse_id_var) 2238 self._match_r_paren() 2239 2240 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2241 if self._prev.token_type == TokenType.UPDATE: 2242 self._match(TokenType.SET) 2243 expressions = self._parse_csv(self._parse_equality) 2244 else: 2245 expressions = None 2246 2247 return self.expression( 2248 exp.OnConflict, 2249 duplicate=duplicate, 2250 expressions=expressions, 2251 action=action, 2252 conflict_keys=conflict_keys, 2253 constraint=constraint, 2254 ) 2255 2256 def _parse_returning(self) -> t.Optional[exp.Returning]: 2257 if not self._match(TokenType.RETURNING): 2258 return None 2259 return self.expression( 2260 exp.Returning, 2261 expressions=self._parse_csv(self._parse_expression), 2262 into=self._match(TokenType.INTO) and self._parse_table_part(), 2263 ) 2264 2265 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2266 if not self._match(TokenType.FORMAT): 2267 return None 2268 return self._parse_row_format() 2269 2270 def _parse_row_format( 2271 self, match_row: bool = False 2272 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2273 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2274 return None 2275 2276 if self._match_text_seq("SERDE"): 2277 this = self._parse_string() 2278 2279 serde_properties = None 2280 if self._match(TokenType.SERDE_PROPERTIES): 2281 serde_properties = self.expression( 2282 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2283 ) 2284 2285 return self.expression( 2286 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2287 ) 2288 2289 self._match_text_seq("DELIMITED") 2290 2291 kwargs = {} 2292 2293 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2294 kwargs["fields"] = self._parse_string() 2295 if self._match_text_seq("ESCAPED", "BY"): 2296 kwargs["escaped"] = self._parse_string() 2297 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2298 kwargs["collection_items"] = self._parse_string() 2299 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2300 kwargs["map_keys"] = self._parse_string() 2301 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2302 kwargs["lines"] = self._parse_string() 2303 if self._match_text_seq("NULL", "DEFINED", "AS"): 2304 kwargs["null"] = self._parse_string() 2305 2306 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2307 2308 def _parse_load(self) -> exp.LoadData | exp.Command: 2309 if self._match_text_seq("DATA"): 2310 local = self._match_text_seq("LOCAL") 2311 self._match_text_seq("INPATH") 2312 inpath = self._parse_string() 2313 overwrite = self._match(TokenType.OVERWRITE) 2314 self._match_pair(TokenType.INTO, TokenType.TABLE) 2315 2316 return self.expression( 2317 exp.LoadData, 2318 this=self._parse_table(schema=True), 2319 local=local, 2320 overwrite=overwrite, 2321 inpath=inpath, 2322 partition=self._parse_partition(), 2323 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2324 serde=self._match_text_seq("SERDE") and self._parse_string(), 2325 ) 2326 return self._parse_as_command(self._prev) 2327 2328 def _parse_delete(self) -> exp.Delete: 2329 # This handles MySQL's "Multiple-Table Syntax" 2330 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2331 tables = None 2332 comments = self._prev_comments 2333 if not self._match(TokenType.FROM, advance=False): 2334 tables = self._parse_csv(self._parse_table) or None 2335 2336 returning = self._parse_returning() 2337 2338 return self.expression( 2339 exp.Delete, 2340 comments=comments, 2341 tables=tables, 2342 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2343 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2344 where=self._parse_where(), 2345 returning=returning or self._parse_returning(), 2346 limit=self._parse_limit(), 2347 ) 2348 2349 def _parse_update(self) -> exp.Update: 2350 comments = self._prev_comments 2351 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2352 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2353 returning = self._parse_returning() 2354 return self.expression( 2355 exp.Update, 2356 comments=comments, 2357 **{ # type: ignore 2358 "this": this, 2359 "expressions": expressions, 2360 "from": self._parse_from(joins=True), 2361 "where": self._parse_where(), 2362 "returning": returning or self._parse_returning(), 2363 "order": self._parse_order(), 2364 "limit": self._parse_limit(), 2365 }, 2366 ) 2367 2368 def _parse_uncache(self) -> exp.Uncache: 2369 if not self._match(TokenType.TABLE): 2370 self.raise_error("Expecting TABLE after UNCACHE") 2371 2372 return self.expression( 2373 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2374 ) 2375 2376 def _parse_cache(self) -> exp.Cache: 2377 lazy = self._match_text_seq("LAZY") 2378 self._match(TokenType.TABLE) 2379 table = self._parse_table(schema=True) 2380 2381 options = [] 2382 if self._match_text_seq("OPTIONS"): 2383 self._match_l_paren() 2384 k = self._parse_string() 2385 self._match(TokenType.EQ) 2386 v = self._parse_string() 2387 options = [k, v] 2388 self._match_r_paren() 2389 2390 self._match(TokenType.ALIAS) 2391 return self.expression( 2392 exp.Cache, 2393 this=table, 2394 lazy=lazy, 2395 options=options, 2396 expression=self._parse_select(nested=True), 2397 ) 2398 2399 def _parse_partition(self) -> t.Optional[exp.Partition]: 2400 if not self._match(TokenType.PARTITION): 2401 return None 2402 2403 return self.expression( 2404 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2405 ) 2406 2407 def _parse_value(self) -> exp.Tuple: 2408 if self._match(TokenType.L_PAREN): 2409 expressions = self._parse_csv(self._parse_expression) 2410 self._match_r_paren() 2411 return self.expression(exp.Tuple, expressions=expressions) 2412 2413 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2414 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2415 2416 def _parse_projections(self) -> t.List[exp.Expression]: 2417 return self._parse_expressions() 2418 2419 def _parse_select( 2420 self, 2421 nested: bool = False, 2422 table: bool = False, 2423 parse_subquery_alias: bool = True, 2424 parse_set_operation: bool = True, 2425 ) -> t.Optional[exp.Expression]: 2426 cte = self._parse_with() 2427 2428 if cte: 2429 this = self._parse_statement() 2430 2431 if not this: 2432 self.raise_error("Failed to parse any statement following CTE") 2433 return cte 2434 2435 if "with" in this.arg_types: 2436 this.set("with", cte) 2437 else: 2438 self.raise_error(f"{this.key} does not support CTE") 2439 this = cte 2440 2441 return this 2442 2443 # duckdb supports leading with FROM x 2444 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2445 2446 if self._match(TokenType.SELECT): 2447 comments = self._prev_comments 2448 2449 hint = self._parse_hint() 2450 all_ = self._match(TokenType.ALL) 2451 distinct = self._match_set(self.DISTINCT_TOKENS) 2452 2453 kind = ( 2454 self._match(TokenType.ALIAS) 2455 and self._match_texts(("STRUCT", "VALUE")) 2456 and self._prev.text.upper() 2457 ) 2458 2459 if distinct: 2460 distinct = self.expression( 2461 exp.Distinct, 2462 on=self._parse_value() if self._match(TokenType.ON) else None, 2463 ) 2464 2465 if all_ and distinct: 2466 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2467 2468 limit = self._parse_limit(top=True) 2469 projections = self._parse_projections() 2470 2471 this = self.expression( 2472 exp.Select, 2473 kind=kind, 2474 hint=hint, 2475 distinct=distinct, 2476 expressions=projections, 2477 limit=limit, 2478 ) 2479 this.comments = comments 2480 2481 into = self._parse_into() 2482 if into: 2483 this.set("into", into) 2484 2485 if not from_: 2486 from_ = self._parse_from() 2487 2488 if from_: 2489 this.set("from", from_) 2490 2491 this = self._parse_query_modifiers(this) 2492 elif (table or nested) and self._match(TokenType.L_PAREN): 2493 if self._match(TokenType.PIVOT): 2494 this = self._parse_simplified_pivot() 2495 elif self._match(TokenType.FROM): 2496 this = exp.select("*").from_( 2497 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2498 ) 2499 else: 2500 this = ( 2501 self._parse_table() 2502 if table 2503 else self._parse_select(nested=True, parse_set_operation=False) 2504 ) 2505 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2506 2507 self._match_r_paren() 2508 2509 # We return early here so that the UNION isn't attached to the subquery by the 2510 # following call to _parse_set_operations, but instead becomes the parent node 2511 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2512 elif self._match(TokenType.VALUES, advance=False): 2513 this = self._parse_derived_table_values() 2514 elif from_: 2515 this = exp.select("*").from_(from_.this, copy=False) 2516 else: 2517 this = None 2518 2519 if parse_set_operation: 2520 return self._parse_set_operations(this) 2521 return this 2522 2523 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2524 if not skip_with_token and not self._match(TokenType.WITH): 2525 return None 2526 2527 comments = self._prev_comments 2528 recursive = self._match(TokenType.RECURSIVE) 2529 2530 expressions = [] 2531 while True: 2532 expressions.append(self._parse_cte()) 2533 2534 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2535 break 2536 else: 2537 self._match(TokenType.WITH) 2538 2539 return self.expression( 2540 exp.With, comments=comments, expressions=expressions, recursive=recursive 2541 ) 2542 2543 def _parse_cte(self) -> exp.CTE: 2544 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2545 if not alias or not alias.this: 2546 self.raise_error("Expected CTE to have alias") 2547 2548 self._match(TokenType.ALIAS) 2549 2550 if self._match_text_seq("NOT", "MATERIALIZED"): 2551 materialized = False 2552 elif self._match_text_seq("MATERIALIZED"): 2553 materialized = True 2554 else: 2555 materialized = None 2556 2557 return self.expression( 2558 exp.CTE, 2559 this=self._parse_wrapped(self._parse_statement), 2560 alias=alias, 2561 materialized=materialized, 2562 ) 2563 2564 def _parse_table_alias( 2565 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2566 ) -> t.Optional[exp.TableAlias]: 2567 any_token = self._match(TokenType.ALIAS) 2568 alias = ( 2569 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2570 or self._parse_string_as_identifier() 2571 ) 2572 2573 index = self._index 2574 if self._match(TokenType.L_PAREN): 2575 columns = self._parse_csv(self._parse_function_parameter) 2576 self._match_r_paren() if columns else self._retreat(index) 2577 else: 2578 columns = None 2579 2580 if not alias and not columns: 2581 return None 2582 2583 return self.expression(exp.TableAlias, this=alias, columns=columns) 2584 2585 def _parse_subquery( 2586 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2587 ) -> t.Optional[exp.Subquery]: 2588 if not this: 2589 return None 2590 2591 return self.expression( 2592 exp.Subquery, 2593 this=this, 2594 pivots=self._parse_pivots(), 2595 alias=self._parse_table_alias() if parse_alias else None, 2596 ) 2597 2598 def _implicit_unnests_to_explicit(self, this: E) -> E: 2599 from sqlglot.optimizer.normalize_identifiers import ( 2600 normalize_identifiers as _norm, 2601 ) 2602 2603 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2604 for i, join in enumerate(this.args.get("joins") or []): 2605 table = join.this 2606 normalized_table = table.copy() 2607 normalized_table.meta["maybe_column"] = True 2608 normalized_table = _norm(normalized_table, dialect=self.dialect) 2609 2610 if isinstance(table, exp.Table) and not join.args.get("on"): 2611 if normalized_table.parts[0].name in refs: 2612 table_as_column = table.to_column() 2613 unnest = exp.Unnest(expressions=[table_as_column]) 2614 2615 # Table.to_column creates a parent Alias node that we want to convert to 2616 # a TableAlias and attach to the Unnest, so it matches the parser's output 2617 if isinstance(table.args.get("alias"), exp.TableAlias): 2618 table_as_column.replace(table_as_column.this) 2619 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2620 2621 table.replace(unnest) 2622 2623 refs.add(normalized_table.alias_or_name) 2624 2625 return this 2626 2627 def _parse_query_modifiers( 2628 self, this: t.Optional[exp.Expression] 2629 ) -> t.Optional[exp.Expression]: 2630 if isinstance(this, (exp.Query, exp.Table)): 2631 for join in iter(self._parse_join, None): 2632 this.append("joins", join) 2633 for lateral in iter(self._parse_lateral, None): 2634 this.append("laterals", lateral) 2635 2636 while True: 2637 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2638 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2639 key, expression = parser(self) 2640 2641 if expression: 2642 this.set(key, expression) 2643 if key == "limit": 2644 offset = expression.args.pop("offset", None) 2645 2646 if offset: 2647 offset = exp.Offset(expression=offset) 2648 this.set("offset", offset) 2649 2650 limit_by_expressions = expression.expressions 2651 expression.set("expressions", None) 2652 offset.set("expressions", limit_by_expressions) 2653 continue 2654 break 2655 2656 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2657 this = self._implicit_unnests_to_explicit(this) 2658 2659 return this 2660 2661 def _parse_hint(self) -> t.Optional[exp.Hint]: 2662 if self._match(TokenType.HINT): 2663 hints = [] 2664 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2665 hints.extend(hint) 2666 2667 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2668 self.raise_error("Expected */ after HINT") 2669 2670 return self.expression(exp.Hint, expressions=hints) 2671 2672 return None 2673 2674 def _parse_into(self) -> t.Optional[exp.Into]: 2675 if not self._match(TokenType.INTO): 2676 return None 2677 2678 temp = self._match(TokenType.TEMPORARY) 2679 unlogged = self._match_text_seq("UNLOGGED") 2680 self._match(TokenType.TABLE) 2681 2682 return self.expression( 2683 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2684 ) 2685 2686 def _parse_from( 2687 self, joins: bool = False, skip_from_token: bool = False 2688 ) -> t.Optional[exp.From]: 2689 if not skip_from_token and not self._match(TokenType.FROM): 2690 return None 2691 2692 return self.expression( 2693 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2694 ) 2695 2696 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2697 if not self._match(TokenType.MATCH_RECOGNIZE): 2698 return None 2699 2700 self._match_l_paren() 2701 2702 partition = self._parse_partition_by() 2703 order = self._parse_order() 2704 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2705 2706 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2707 rows = exp.var("ONE ROW PER MATCH") 2708 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2709 text = "ALL ROWS PER MATCH" 2710 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2711 text += " SHOW EMPTY MATCHES" 2712 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2713 text += " OMIT EMPTY MATCHES" 2714 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2715 text += " WITH UNMATCHED ROWS" 2716 rows = exp.var(text) 2717 else: 2718 rows = None 2719 2720 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2721 text = "AFTER MATCH SKIP" 2722 if self._match_text_seq("PAST", "LAST", "ROW"): 2723 text += " PAST LAST ROW" 2724 elif self._match_text_seq("TO", "NEXT", "ROW"): 2725 text += " TO NEXT ROW" 2726 elif self._match_text_seq("TO", "FIRST"): 2727 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2728 elif self._match_text_seq("TO", "LAST"): 2729 text += f" TO LAST {self._advance_any().text}" # type: ignore 2730 after = exp.var(text) 2731 else: 2732 after = None 2733 2734 if self._match_text_seq("PATTERN"): 2735 self._match_l_paren() 2736 2737 if not self._curr: 2738 self.raise_error("Expecting )", self._curr) 2739 2740 paren = 1 2741 start = self._curr 2742 2743 while self._curr and paren > 0: 2744 if self._curr.token_type == TokenType.L_PAREN: 2745 paren += 1 2746 if self._curr.token_type == TokenType.R_PAREN: 2747 paren -= 1 2748 2749 end = self._prev 2750 self._advance() 2751 2752 if paren > 0: 2753 self.raise_error("Expecting )", self._curr) 2754 2755 pattern = exp.var(self._find_sql(start, end)) 2756 else: 2757 pattern = None 2758 2759 define = ( 2760 self._parse_csv(self._parse_name_as_expression) 2761 if self._match_text_seq("DEFINE") 2762 else None 2763 ) 2764 2765 self._match_r_paren() 2766 2767 return self.expression( 2768 exp.MatchRecognize, 2769 partition_by=partition, 2770 order=order, 2771 measures=measures, 2772 rows=rows, 2773 after=after, 2774 pattern=pattern, 2775 define=define, 2776 alias=self._parse_table_alias(), 2777 ) 2778 2779 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2780 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2781 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2782 cross_apply = False 2783 2784 if cross_apply is not None: 2785 this = self._parse_select(table=True) 2786 view = None 2787 outer = None 2788 elif self._match(TokenType.LATERAL): 2789 this = self._parse_select(table=True) 2790 view = self._match(TokenType.VIEW) 2791 outer = self._match(TokenType.OUTER) 2792 else: 2793 return None 2794 2795 if not this: 2796 this = ( 2797 self._parse_unnest() 2798 or self._parse_function() 2799 or self._parse_id_var(any_token=False) 2800 ) 2801 2802 while self._match(TokenType.DOT): 2803 this = exp.Dot( 2804 this=this, 2805 expression=self._parse_function() or self._parse_id_var(any_token=False), 2806 ) 2807 2808 if view: 2809 table = self._parse_id_var(any_token=False) 2810 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2811 table_alias: t.Optional[exp.TableAlias] = self.expression( 2812 exp.TableAlias, this=table, columns=columns 2813 ) 2814 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2815 # We move the alias from the lateral's child node to the lateral itself 2816 table_alias = this.args["alias"].pop() 2817 else: 2818 table_alias = self._parse_table_alias() 2819 2820 return self.expression( 2821 exp.Lateral, 2822 this=this, 2823 view=view, 2824 outer=outer, 2825 alias=table_alias, 2826 cross_apply=cross_apply, 2827 ) 2828 2829 def _parse_join_parts( 2830 self, 2831 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2832 return ( 2833 self._match_set(self.JOIN_METHODS) and self._prev, 2834 self._match_set(self.JOIN_SIDES) and self._prev, 2835 self._match_set(self.JOIN_KINDS) and self._prev, 2836 ) 2837 2838 def _parse_join( 2839 self, skip_join_token: bool = False, parse_bracket: bool = False 2840 ) -> t.Optional[exp.Join]: 2841 if self._match(TokenType.COMMA): 2842 return self.expression(exp.Join, this=self._parse_table()) 2843 2844 index = self._index 2845 method, side, kind = self._parse_join_parts() 2846 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2847 join = self._match(TokenType.JOIN) 2848 2849 if not skip_join_token and not join: 2850 self._retreat(index) 2851 kind = None 2852 method = None 2853 side = None 2854 2855 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2856 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2857 2858 if not skip_join_token and not join and not outer_apply and not cross_apply: 2859 return None 2860 2861 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2862 2863 if method: 2864 kwargs["method"] = method.text 2865 if side: 2866 kwargs["side"] = side.text 2867 if kind: 2868 kwargs["kind"] = kind.text 2869 if hint: 2870 kwargs["hint"] = hint 2871 2872 if self._match(TokenType.ON): 2873 kwargs["on"] = self._parse_conjunction() 2874 elif self._match(TokenType.USING): 2875 kwargs["using"] = self._parse_wrapped_id_vars() 2876 elif not (kind and kind.token_type == TokenType.CROSS): 2877 index = self._index 2878 join = self._parse_join() 2879 2880 if join and self._match(TokenType.ON): 2881 kwargs["on"] = self._parse_conjunction() 2882 elif join and self._match(TokenType.USING): 2883 kwargs["using"] = self._parse_wrapped_id_vars() 2884 else: 2885 join = None 2886 self._retreat(index) 2887 2888 kwargs["this"].set("joins", [join] if join else None) 2889 2890 comments = [c for token in (method, side, kind) if token for c in token.comments] 2891 return self.expression(exp.Join, comments=comments, **kwargs) 2892 2893 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2894 this = self._parse_conjunction() 2895 2896 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2897 return this 2898 2899 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2900 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2901 2902 return this 2903 2904 def _parse_index_params(self) -> exp.IndexParameters: 2905 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2906 2907 if self._match(TokenType.L_PAREN, advance=False): 2908 columns = self._parse_wrapped_csv(self._parse_with_operator) 2909 else: 2910 columns = None 2911 2912 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2913 partition_by = self._parse_partition_by() 2914 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 2915 tablespace = ( 2916 self._parse_var(any_token=True) 2917 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 2918 else None 2919 ) 2920 where = self._parse_where() 2921 2922 return self.expression( 2923 exp.IndexParameters, 2924 using=using, 2925 columns=columns, 2926 include=include, 2927 partition_by=partition_by, 2928 where=where, 2929 with_storage=with_storage, 2930 tablespace=tablespace, 2931 ) 2932 2933 def _parse_index( 2934 self, 2935 index: t.Optional[exp.Expression] = None, 2936 ) -> t.Optional[exp.Index]: 2937 if index: 2938 unique = None 2939 primary = None 2940 amp = None 2941 2942 self._match(TokenType.ON) 2943 self._match(TokenType.TABLE) # hive 2944 table = self._parse_table_parts(schema=True) 2945 else: 2946 unique = self._match(TokenType.UNIQUE) 2947 primary = self._match_text_seq("PRIMARY") 2948 amp = self._match_text_seq("AMP") 2949 2950 if not self._match(TokenType.INDEX): 2951 return None 2952 2953 index = self._parse_id_var() 2954 table = None 2955 2956 params = self._parse_index_params() 2957 2958 return self.expression( 2959 exp.Index, 2960 this=index, 2961 table=table, 2962 unique=unique, 2963 primary=primary, 2964 amp=amp, 2965 params=params, 2966 ) 2967 2968 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2969 hints: t.List[exp.Expression] = [] 2970 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2971 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2972 hints.append( 2973 self.expression( 2974 exp.WithTableHint, 2975 expressions=self._parse_csv( 2976 lambda: self._parse_function() or self._parse_var(any_token=True) 2977 ), 2978 ) 2979 ) 2980 self._match_r_paren() 2981 else: 2982 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2983 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2984 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2985 2986 self._match_texts(("INDEX", "KEY")) 2987 if self._match(TokenType.FOR): 2988 hint.set("target", self._advance_any() and self._prev.text.upper()) 2989 2990 hint.set("expressions", self._parse_wrapped_id_vars()) 2991 hints.append(hint) 2992 2993 return hints or None 2994 2995 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2996 return ( 2997 (not schema and self._parse_function(optional_parens=False)) 2998 or self._parse_id_var(any_token=False) 2999 or self._parse_string_as_identifier() 3000 or self._parse_placeholder() 3001 ) 3002 3003 def _parse_table_parts( 3004 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3005 ) -> exp.Table: 3006 catalog = None 3007 db = None 3008 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3009 3010 while self._match(TokenType.DOT): 3011 if catalog: 3012 # This allows nesting the table in arbitrarily many dot expressions if needed 3013 table = self.expression( 3014 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3015 ) 3016 else: 3017 catalog = db 3018 db = table 3019 # "" used for tsql FROM a..b case 3020 table = self._parse_table_part(schema=schema) or "" 3021 3022 if ( 3023 wildcard 3024 and self._is_connected() 3025 and (isinstance(table, exp.Identifier) or not table) 3026 and self._match(TokenType.STAR) 3027 ): 3028 if isinstance(table, exp.Identifier): 3029 table.args["this"] += "*" 3030 else: 3031 table = exp.Identifier(this="*") 3032 3033 if is_db_reference: 3034 catalog = db 3035 db = table 3036 table = None 3037 3038 if not table and not is_db_reference: 3039 self.raise_error(f"Expected table name but got {self._curr}") 3040 if not db and is_db_reference: 3041 self.raise_error(f"Expected database name but got {self._curr}") 3042 3043 return self.expression( 3044 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 3045 ) 3046 3047 def _parse_table( 3048 self, 3049 schema: bool = False, 3050 joins: bool = False, 3051 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3052 parse_bracket: bool = False, 3053 is_db_reference: bool = False, 3054 ) -> t.Optional[exp.Expression]: 3055 lateral = self._parse_lateral() 3056 if lateral: 3057 return lateral 3058 3059 unnest = self._parse_unnest() 3060 if unnest: 3061 return unnest 3062 3063 values = self._parse_derived_table_values() 3064 if values: 3065 return values 3066 3067 subquery = self._parse_select(table=True) 3068 if subquery: 3069 if not subquery.args.get("pivots"): 3070 subquery.set("pivots", self._parse_pivots()) 3071 return subquery 3072 3073 bracket = parse_bracket and self._parse_bracket(None) 3074 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3075 3076 only = self._match(TokenType.ONLY) 3077 3078 this = t.cast( 3079 exp.Expression, 3080 bracket 3081 or self._parse_bracket( 3082 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3083 ), 3084 ) 3085 3086 if only: 3087 this.set("only", only) 3088 3089 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3090 self._match_text_seq("*") 3091 3092 if schema: 3093 return self._parse_schema(this=this) 3094 3095 version = self._parse_version() 3096 3097 if version: 3098 this.set("version", version) 3099 3100 if self.dialect.ALIAS_POST_TABLESAMPLE: 3101 table_sample = self._parse_table_sample() 3102 3103 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3104 if alias: 3105 this.set("alias", alias) 3106 3107 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3108 return self.expression( 3109 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3110 ) 3111 3112 this.set("hints", self._parse_table_hints()) 3113 3114 if not this.args.get("pivots"): 3115 this.set("pivots", self._parse_pivots()) 3116 3117 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3118 table_sample = self._parse_table_sample() 3119 3120 if table_sample: 3121 table_sample.set("this", this) 3122 this = table_sample 3123 3124 if joins: 3125 for join in iter(self._parse_join, None): 3126 this.append("joins", join) 3127 3128 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3129 this.set("ordinality", True) 3130 this.set("alias", self._parse_table_alias()) 3131 3132 return this 3133 3134 def _parse_version(self) -> t.Optional[exp.Version]: 3135 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3136 this = "TIMESTAMP" 3137 elif self._match(TokenType.VERSION_SNAPSHOT): 3138 this = "VERSION" 3139 else: 3140 return None 3141 3142 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3143 kind = self._prev.text.upper() 3144 start = self._parse_bitwise() 3145 self._match_texts(("TO", "AND")) 3146 end = self._parse_bitwise() 3147 expression: t.Optional[exp.Expression] = self.expression( 3148 exp.Tuple, expressions=[start, end] 3149 ) 3150 elif self._match_text_seq("CONTAINED", "IN"): 3151 kind = "CONTAINED IN" 3152 expression = self.expression( 3153 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3154 ) 3155 elif self._match(TokenType.ALL): 3156 kind = "ALL" 3157 expression = None 3158 else: 3159 self._match_text_seq("AS", "OF") 3160 kind = "AS OF" 3161 expression = self._parse_type() 3162 3163 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3164 3165 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3166 if not self._match(TokenType.UNNEST): 3167 return None 3168 3169 expressions = self._parse_wrapped_csv(self._parse_equality) 3170 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3171 3172 alias = self._parse_table_alias() if with_alias else None 3173 3174 if alias: 3175 if self.dialect.UNNEST_COLUMN_ONLY: 3176 if alias.args.get("columns"): 3177 self.raise_error("Unexpected extra column alias in unnest.") 3178 3179 alias.set("columns", [alias.this]) 3180 alias.set("this", None) 3181 3182 columns = alias.args.get("columns") or [] 3183 if offset and len(expressions) < len(columns): 3184 offset = columns.pop() 3185 3186 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3187 self._match(TokenType.ALIAS) 3188 offset = self._parse_id_var( 3189 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3190 ) or exp.to_identifier("offset") 3191 3192 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3193 3194 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3195 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3196 if not is_derived and not self._match_text_seq("VALUES"): 3197 return None 3198 3199 expressions = self._parse_csv(self._parse_value) 3200 alias = self._parse_table_alias() 3201 3202 if is_derived: 3203 self._match_r_paren() 3204 3205 return self.expression( 3206 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3207 ) 3208 3209 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3210 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3211 as_modifier and self._match_text_seq("USING", "SAMPLE") 3212 ): 3213 return None 3214 3215 bucket_numerator = None 3216 bucket_denominator = None 3217 bucket_field = None 3218 percent = None 3219 size = None 3220 seed = None 3221 3222 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3223 matched_l_paren = self._match(TokenType.L_PAREN) 3224 3225 if self.TABLESAMPLE_CSV: 3226 num = None 3227 expressions = self._parse_csv(self._parse_primary) 3228 else: 3229 expressions = None 3230 num = ( 3231 self._parse_factor() 3232 if self._match(TokenType.NUMBER, advance=False) 3233 else self._parse_primary() or self._parse_placeholder() 3234 ) 3235 3236 if self._match_text_seq("BUCKET"): 3237 bucket_numerator = self._parse_number() 3238 self._match_text_seq("OUT", "OF") 3239 bucket_denominator = bucket_denominator = self._parse_number() 3240 self._match(TokenType.ON) 3241 bucket_field = self._parse_field() 3242 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3243 percent = num 3244 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3245 size = num 3246 else: 3247 percent = num 3248 3249 if matched_l_paren: 3250 self._match_r_paren() 3251 3252 if self._match(TokenType.L_PAREN): 3253 method = self._parse_var(upper=True) 3254 seed = self._match(TokenType.COMMA) and self._parse_number() 3255 self._match_r_paren() 3256 elif self._match_texts(("SEED", "REPEATABLE")): 3257 seed = self._parse_wrapped(self._parse_number) 3258 3259 return self.expression( 3260 exp.TableSample, 3261 expressions=expressions, 3262 method=method, 3263 bucket_numerator=bucket_numerator, 3264 bucket_denominator=bucket_denominator, 3265 bucket_field=bucket_field, 3266 percent=percent, 3267 size=size, 3268 seed=seed, 3269 ) 3270 3271 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3272 return list(iter(self._parse_pivot, None)) or None 3273 3274 # https://duckdb.org/docs/sql/statements/pivot 3275 def _parse_simplified_pivot(self) -> exp.Pivot: 3276 def _parse_on() -> t.Optional[exp.Expression]: 3277 this = self._parse_bitwise() 3278 return self._parse_in(this) if self._match(TokenType.IN) else this 3279 3280 this = self._parse_table() 3281 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3282 using = self._match(TokenType.USING) and self._parse_csv( 3283 lambda: self._parse_alias(self._parse_function()) 3284 ) 3285 group = self._parse_group() 3286 return self.expression( 3287 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3288 ) 3289 3290 def _parse_pivot_in(self) -> exp.In: 3291 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3292 this = self._parse_conjunction() 3293 3294 self._match(TokenType.ALIAS) 3295 alias = self._parse_field() 3296 if alias: 3297 return self.expression(exp.PivotAlias, this=this, alias=alias) 3298 3299 return this 3300 3301 value = self._parse_column() 3302 3303 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3304 self.raise_error("Expecting IN (") 3305 3306 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3307 3308 self._match_r_paren() 3309 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3310 3311 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3312 index = self._index 3313 include_nulls = None 3314 3315 if self._match(TokenType.PIVOT): 3316 unpivot = False 3317 elif self._match(TokenType.UNPIVOT): 3318 unpivot = True 3319 3320 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3321 if self._match_text_seq("INCLUDE", "NULLS"): 3322 include_nulls = True 3323 elif self._match_text_seq("EXCLUDE", "NULLS"): 3324 include_nulls = False 3325 else: 3326 return None 3327 3328 expressions = [] 3329 3330 if not self._match(TokenType.L_PAREN): 3331 self._retreat(index) 3332 return None 3333 3334 if unpivot: 3335 expressions = self._parse_csv(self._parse_column) 3336 else: 3337 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3338 3339 if not expressions: 3340 self.raise_error("Failed to parse PIVOT's aggregation list") 3341 3342 if not self._match(TokenType.FOR): 3343 self.raise_error("Expecting FOR") 3344 3345 field = self._parse_pivot_in() 3346 3347 self._match_r_paren() 3348 3349 pivot = self.expression( 3350 exp.Pivot, 3351 expressions=expressions, 3352 field=field, 3353 unpivot=unpivot, 3354 include_nulls=include_nulls, 3355 ) 3356 3357 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3358 pivot.set("alias", self._parse_table_alias()) 3359 3360 if not unpivot: 3361 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3362 3363 columns: t.List[exp.Expression] = [] 3364 for fld in pivot.args["field"].expressions: 3365 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3366 for name in names: 3367 if self.PREFIXED_PIVOT_COLUMNS: 3368 name = f"{name}_{field_name}" if name else field_name 3369 else: 3370 name = f"{field_name}_{name}" if name else field_name 3371 3372 columns.append(exp.to_identifier(name)) 3373 3374 pivot.set("columns", columns) 3375 3376 return pivot 3377 3378 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3379 return [agg.alias for agg in aggregations] 3380 3381 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3382 if not skip_where_token and not self._match(TokenType.PREWHERE): 3383 return None 3384 3385 return self.expression( 3386 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3387 ) 3388 3389 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3390 if not skip_where_token and not self._match(TokenType.WHERE): 3391 return None 3392 3393 return self.expression( 3394 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3395 ) 3396 3397 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3398 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3399 return None 3400 3401 elements = defaultdict(list) 3402 3403 if self._match(TokenType.ALL): 3404 return self.expression(exp.Group, all=True) 3405 3406 while True: 3407 expressions = self._parse_csv(self._parse_conjunction) 3408 if expressions: 3409 elements["expressions"].extend(expressions) 3410 3411 grouping_sets = self._parse_grouping_sets() 3412 if grouping_sets: 3413 elements["grouping_sets"].extend(grouping_sets) 3414 3415 rollup = None 3416 cube = None 3417 totals = None 3418 3419 index = self._index 3420 with_ = self._match(TokenType.WITH) 3421 if self._match(TokenType.ROLLUP): 3422 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3423 elements["rollup"].extend(ensure_list(rollup)) 3424 3425 if self._match(TokenType.CUBE): 3426 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3427 elements["cube"].extend(ensure_list(cube)) 3428 3429 if self._match_text_seq("TOTALS"): 3430 totals = True 3431 elements["totals"] = True # type: ignore 3432 3433 if not (grouping_sets or rollup or cube or totals): 3434 if with_: 3435 self._retreat(index) 3436 break 3437 3438 return self.expression(exp.Group, **elements) # type: ignore 3439 3440 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3441 if not self._match(TokenType.GROUPING_SETS): 3442 return None 3443 3444 return self._parse_wrapped_csv(self._parse_grouping_set) 3445 3446 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3447 if self._match(TokenType.L_PAREN): 3448 grouping_set = self._parse_csv(self._parse_column) 3449 self._match_r_paren() 3450 return self.expression(exp.Tuple, expressions=grouping_set) 3451 3452 return self._parse_column() 3453 3454 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3455 if not skip_having_token and not self._match(TokenType.HAVING): 3456 return None 3457 return self.expression(exp.Having, this=self._parse_conjunction()) 3458 3459 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3460 if not self._match(TokenType.QUALIFY): 3461 return None 3462 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3463 3464 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3465 if skip_start_token: 3466 start = None 3467 elif self._match(TokenType.START_WITH): 3468 start = self._parse_conjunction() 3469 else: 3470 return None 3471 3472 self._match(TokenType.CONNECT_BY) 3473 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3474 exp.Prior, this=self._parse_bitwise() 3475 ) 3476 connect = self._parse_conjunction() 3477 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3478 3479 if not start and self._match(TokenType.START_WITH): 3480 start = self._parse_conjunction() 3481 3482 return self.expression(exp.Connect, start=start, connect=connect) 3483 3484 def _parse_name_as_expression(self) -> exp.Alias: 3485 return self.expression( 3486 exp.Alias, 3487 alias=self._parse_id_var(any_token=True), 3488 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3489 ) 3490 3491 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3492 if self._match_text_seq("INTERPOLATE"): 3493 return self._parse_wrapped_csv(self._parse_name_as_expression) 3494 return None 3495 3496 def _parse_order( 3497 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3498 ) -> t.Optional[exp.Expression]: 3499 siblings = None 3500 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3501 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3502 return this 3503 3504 siblings = True 3505 3506 return self.expression( 3507 exp.Order, 3508 this=this, 3509 expressions=self._parse_csv(self._parse_ordered), 3510 interpolate=self._parse_interpolate(), 3511 siblings=siblings, 3512 ) 3513 3514 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3515 if not self._match(token): 3516 return None 3517 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3518 3519 def _parse_ordered( 3520 self, parse_method: t.Optional[t.Callable] = None 3521 ) -> t.Optional[exp.Ordered]: 3522 this = parse_method() if parse_method else self._parse_conjunction() 3523 if not this: 3524 return None 3525 3526 asc = self._match(TokenType.ASC) 3527 desc = self._match(TokenType.DESC) or (asc and False) 3528 3529 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3530 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3531 3532 nulls_first = is_nulls_first or False 3533 explicitly_null_ordered = is_nulls_first or is_nulls_last 3534 3535 if ( 3536 not explicitly_null_ordered 3537 and ( 3538 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3539 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3540 ) 3541 and self.dialect.NULL_ORDERING != "nulls_are_last" 3542 ): 3543 nulls_first = True 3544 3545 if self._match_text_seq("WITH", "FILL"): 3546 with_fill = self.expression( 3547 exp.WithFill, 3548 **{ # type: ignore 3549 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3550 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3551 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3552 }, 3553 ) 3554 else: 3555 with_fill = None 3556 3557 return self.expression( 3558 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3559 ) 3560 3561 def _parse_limit( 3562 self, 3563 this: t.Optional[exp.Expression] = None, 3564 top: bool = False, 3565 skip_limit_token: bool = False, 3566 ) -> t.Optional[exp.Expression]: 3567 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3568 comments = self._prev_comments 3569 if top: 3570 limit_paren = self._match(TokenType.L_PAREN) 3571 expression = self._parse_term() if limit_paren else self._parse_number() 3572 3573 if limit_paren: 3574 self._match_r_paren() 3575 else: 3576 expression = self._parse_term() 3577 3578 if self._match(TokenType.COMMA): 3579 offset = expression 3580 expression = self._parse_term() 3581 else: 3582 offset = None 3583 3584 limit_exp = self.expression( 3585 exp.Limit, 3586 this=this, 3587 expression=expression, 3588 offset=offset, 3589 comments=comments, 3590 expressions=self._parse_limit_by(), 3591 ) 3592 3593 return limit_exp 3594 3595 if self._match(TokenType.FETCH): 3596 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3597 direction = self._prev.text.upper() if direction else "FIRST" 3598 3599 count = self._parse_field(tokens=self.FETCH_TOKENS) 3600 percent = self._match(TokenType.PERCENT) 3601 3602 self._match_set((TokenType.ROW, TokenType.ROWS)) 3603 3604 only = self._match_text_seq("ONLY") 3605 with_ties = self._match_text_seq("WITH", "TIES") 3606 3607 if only and with_ties: 3608 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3609 3610 return self.expression( 3611 exp.Fetch, 3612 direction=direction, 3613 count=count, 3614 percent=percent, 3615 with_ties=with_ties, 3616 ) 3617 3618 return this 3619 3620 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3621 if not self._match(TokenType.OFFSET): 3622 return this 3623 3624 count = self._parse_term() 3625 self._match_set((TokenType.ROW, TokenType.ROWS)) 3626 3627 return self.expression( 3628 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3629 ) 3630 3631 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3632 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3633 3634 def _parse_locks(self) -> t.List[exp.Lock]: 3635 locks = [] 3636 while True: 3637 if self._match_text_seq("FOR", "UPDATE"): 3638 update = True 3639 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3640 "LOCK", "IN", "SHARE", "MODE" 3641 ): 3642 update = False 3643 else: 3644 break 3645 3646 expressions = None 3647 if self._match_text_seq("OF"): 3648 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3649 3650 wait: t.Optional[bool | exp.Expression] = None 3651 if self._match_text_seq("NOWAIT"): 3652 wait = True 3653 elif self._match_text_seq("WAIT"): 3654 wait = self._parse_primary() 3655 elif self._match_text_seq("SKIP", "LOCKED"): 3656 wait = False 3657 3658 locks.append( 3659 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3660 ) 3661 3662 return locks 3663 3664 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3665 while this and self._match_set(self.SET_OPERATIONS): 3666 token_type = self._prev.token_type 3667 3668 if token_type == TokenType.UNION: 3669 operation = exp.Union 3670 elif token_type == TokenType.EXCEPT: 3671 operation = exp.Except 3672 else: 3673 operation = exp.Intersect 3674 3675 comments = self._prev.comments 3676 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3677 by_name = self._match_text_seq("BY", "NAME") 3678 expression = self._parse_select(nested=True, parse_set_operation=False) 3679 3680 this = self.expression( 3681 operation, 3682 comments=comments, 3683 this=this, 3684 distinct=distinct, 3685 by_name=by_name, 3686 expression=expression, 3687 ) 3688 3689 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3690 expression = this.expression 3691 3692 if expression: 3693 for arg in self.UNION_MODIFIERS: 3694 expr = expression.args.get(arg) 3695 if expr: 3696 this.set(arg, expr.pop()) 3697 3698 return this 3699 3700 def _parse_expression(self) -> t.Optional[exp.Expression]: 3701 return self._parse_alias(self._parse_conjunction()) 3702 3703 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3704 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3705 3706 def _parse_equality(self) -> t.Optional[exp.Expression]: 3707 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3708 3709 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3710 return self._parse_tokens(self._parse_range, self.COMPARISON) 3711 3712 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3713 this = this or self._parse_bitwise() 3714 negate = self._match(TokenType.NOT) 3715 3716 if self._match_set(self.RANGE_PARSERS): 3717 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3718 if not expression: 3719 return this 3720 3721 this = expression 3722 elif self._match(TokenType.ISNULL): 3723 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3724 3725 # Postgres supports ISNULL and NOTNULL for conditions. 3726 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3727 if self._match(TokenType.NOTNULL): 3728 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3729 this = self.expression(exp.Not, this=this) 3730 3731 if negate: 3732 this = self.expression(exp.Not, this=this) 3733 3734 if self._match(TokenType.IS): 3735 this = self._parse_is(this) 3736 3737 return this 3738 3739 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3740 index = self._index - 1 3741 negate = self._match(TokenType.NOT) 3742 3743 if self._match_text_seq("DISTINCT", "FROM"): 3744 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3745 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3746 3747 expression = self._parse_null() or self._parse_boolean() 3748 if not expression: 3749 self._retreat(index) 3750 return None 3751 3752 this = self.expression(exp.Is, this=this, expression=expression) 3753 return self.expression(exp.Not, this=this) if negate else this 3754 3755 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3756 unnest = self._parse_unnest(with_alias=False) 3757 if unnest: 3758 this = self.expression(exp.In, this=this, unnest=unnest) 3759 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3760 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3761 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3762 3763 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3764 this = self.expression(exp.In, this=this, query=expressions[0]) 3765 else: 3766 this = self.expression(exp.In, this=this, expressions=expressions) 3767 3768 if matched_l_paren: 3769 self._match_r_paren(this) 3770 elif not self._match(TokenType.R_BRACKET, expression=this): 3771 self.raise_error("Expecting ]") 3772 else: 3773 this = self.expression(exp.In, this=this, field=self._parse_field()) 3774 3775 return this 3776 3777 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3778 low = self._parse_bitwise() 3779 self._match(TokenType.AND) 3780 high = self._parse_bitwise() 3781 return self.expression(exp.Between, this=this, low=low, high=high) 3782 3783 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3784 if not self._match(TokenType.ESCAPE): 3785 return this 3786 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3787 3788 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3789 index = self._index 3790 3791 if not self._match(TokenType.INTERVAL) and match_interval: 3792 return None 3793 3794 if self._match(TokenType.STRING, advance=False): 3795 this = self._parse_primary() 3796 else: 3797 this = self._parse_term() 3798 3799 if not this or ( 3800 isinstance(this, exp.Column) 3801 and not this.table 3802 and not this.this.quoted 3803 and this.name.upper() == "IS" 3804 ): 3805 self._retreat(index) 3806 return None 3807 3808 unit = self._parse_function() or ( 3809 not self._match(TokenType.ALIAS, advance=False) 3810 and self._parse_var(any_token=True, upper=True) 3811 ) 3812 3813 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3814 # each INTERVAL expression into this canonical form so it's easy to transpile 3815 if this and this.is_number: 3816 this = exp.Literal.string(this.name) 3817 elif this and this.is_string: 3818 parts = this.name.split() 3819 3820 if len(parts) == 2: 3821 if unit: 3822 # This is not actually a unit, it's something else (e.g. a "window side") 3823 unit = None 3824 self._retreat(self._index - 1) 3825 3826 this = exp.Literal.string(parts[0]) 3827 unit = self.expression(exp.Var, this=parts[1].upper()) 3828 3829 return self.expression(exp.Interval, this=this, unit=unit) 3830 3831 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3832 this = self._parse_term() 3833 3834 while True: 3835 if self._match_set(self.BITWISE): 3836 this = self.expression( 3837 self.BITWISE[self._prev.token_type], 3838 this=this, 3839 expression=self._parse_term(), 3840 ) 3841 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3842 this = self.expression( 3843 exp.DPipe, 3844 this=this, 3845 expression=self._parse_term(), 3846 safe=not self.dialect.STRICT_STRING_CONCAT, 3847 ) 3848 elif self._match(TokenType.DQMARK): 3849 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3850 elif self._match_pair(TokenType.LT, TokenType.LT): 3851 this = self.expression( 3852 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3853 ) 3854 elif self._match_pair(TokenType.GT, TokenType.GT): 3855 this = self.expression( 3856 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3857 ) 3858 else: 3859 break 3860 3861 return this 3862 3863 def _parse_term(self) -> t.Optional[exp.Expression]: 3864 return self._parse_tokens(self._parse_factor, self.TERM) 3865 3866 def _parse_factor(self) -> t.Optional[exp.Expression]: 3867 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3868 this = parse_method() 3869 3870 while self._match_set(self.FACTOR): 3871 this = self.expression( 3872 self.FACTOR[self._prev.token_type], 3873 this=this, 3874 comments=self._prev_comments, 3875 expression=parse_method(), 3876 ) 3877 if isinstance(this, exp.Div): 3878 this.args["typed"] = self.dialect.TYPED_DIVISION 3879 this.args["safe"] = self.dialect.SAFE_DIVISION 3880 3881 return this 3882 3883 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3884 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3885 3886 def _parse_unary(self) -> t.Optional[exp.Expression]: 3887 if self._match_set(self.UNARY_PARSERS): 3888 return self.UNARY_PARSERS[self._prev.token_type](self) 3889 return self._parse_at_time_zone(self._parse_type()) 3890 3891 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3892 interval = parse_interval and self._parse_interval() 3893 if interval: 3894 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3895 while True: 3896 index = self._index 3897 self._match(TokenType.PLUS) 3898 3899 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3900 self._retreat(index) 3901 break 3902 3903 interval = self.expression( # type: ignore 3904 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3905 ) 3906 3907 return interval 3908 3909 index = self._index 3910 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3911 this = self._parse_column() 3912 3913 if data_type: 3914 if isinstance(this, exp.Literal): 3915 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3916 if parser: 3917 return parser(self, this, data_type) 3918 return self.expression(exp.Cast, this=this, to=data_type) 3919 if not data_type.expressions: 3920 self._retreat(index) 3921 return self._parse_column() 3922 return self._parse_column_ops(data_type) 3923 3924 return this and self._parse_column_ops(this) 3925 3926 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3927 this = self._parse_type() 3928 if not this: 3929 return None 3930 3931 return self.expression( 3932 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3933 ) 3934 3935 def _parse_types( 3936 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3937 ) -> t.Optional[exp.Expression]: 3938 index = self._index 3939 3940 prefix = self._match_text_seq("SYSUDTLIB", ".") 3941 3942 if not self._match_set(self.TYPE_TOKENS): 3943 identifier = allow_identifiers and self._parse_id_var( 3944 any_token=False, tokens=(TokenType.VAR,) 3945 ) 3946 if identifier: 3947 tokens = self.dialect.tokenize(identifier.name) 3948 3949 if len(tokens) != 1: 3950 self.raise_error("Unexpected identifier", self._prev) 3951 3952 if tokens[0].token_type in self.TYPE_TOKENS: 3953 self._prev = tokens[0] 3954 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3955 type_name = identifier.name 3956 3957 while self._match(TokenType.DOT): 3958 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3959 3960 return exp.DataType.build(type_name, udt=True) 3961 else: 3962 self._retreat(self._index - 1) 3963 return None 3964 else: 3965 return None 3966 3967 type_token = self._prev.token_type 3968 3969 if type_token == TokenType.PSEUDO_TYPE: 3970 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3971 3972 if type_token == TokenType.OBJECT_IDENTIFIER: 3973 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3974 3975 nested = type_token in self.NESTED_TYPE_TOKENS 3976 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3977 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3978 expressions = None 3979 maybe_func = False 3980 3981 if self._match(TokenType.L_PAREN): 3982 if is_struct: 3983 expressions = self._parse_csv(self._parse_struct_types) 3984 elif nested: 3985 expressions = self._parse_csv( 3986 lambda: self._parse_types( 3987 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3988 ) 3989 ) 3990 elif type_token in self.ENUM_TYPE_TOKENS: 3991 expressions = self._parse_csv(self._parse_equality) 3992 elif is_aggregate: 3993 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3994 any_token=False, tokens=(TokenType.VAR,) 3995 ) 3996 if not func_or_ident or not self._match(TokenType.COMMA): 3997 return None 3998 expressions = self._parse_csv( 3999 lambda: self._parse_types( 4000 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4001 ) 4002 ) 4003 expressions.insert(0, func_or_ident) 4004 else: 4005 expressions = self._parse_csv(self._parse_type_size) 4006 4007 if not expressions or not self._match(TokenType.R_PAREN): 4008 self._retreat(index) 4009 return None 4010 4011 maybe_func = True 4012 4013 this: t.Optional[exp.Expression] = None 4014 values: t.Optional[t.List[exp.Expression]] = None 4015 4016 if nested and self._match(TokenType.LT): 4017 if is_struct: 4018 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4019 else: 4020 expressions = self._parse_csv( 4021 lambda: self._parse_types( 4022 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4023 ) 4024 ) 4025 4026 if not self._match(TokenType.GT): 4027 self.raise_error("Expecting >") 4028 4029 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4030 values = self._parse_csv(self._parse_conjunction) 4031 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4032 4033 if type_token in self.TIMESTAMPS: 4034 if self._match_text_seq("WITH", "TIME", "ZONE"): 4035 maybe_func = False 4036 tz_type = ( 4037 exp.DataType.Type.TIMETZ 4038 if type_token in self.TIMES 4039 else exp.DataType.Type.TIMESTAMPTZ 4040 ) 4041 this = exp.DataType(this=tz_type, expressions=expressions) 4042 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4043 maybe_func = False 4044 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4045 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4046 maybe_func = False 4047 elif type_token == TokenType.INTERVAL: 4048 unit = self._parse_var() 4049 4050 if self._match_text_seq("TO"): 4051 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 4052 else: 4053 span = None 4054 4055 if span or not unit: 4056 this = self.expression( 4057 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 4058 ) 4059 else: 4060 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4061 4062 if maybe_func and check_func: 4063 index2 = self._index 4064 peek = self._parse_string() 4065 4066 if not peek: 4067 self._retreat(index) 4068 return None 4069 4070 self._retreat(index2) 4071 4072 if not this: 4073 if self._match_text_seq("UNSIGNED"): 4074 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4075 if not unsigned_type_token: 4076 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4077 4078 type_token = unsigned_type_token or type_token 4079 4080 this = exp.DataType( 4081 this=exp.DataType.Type[type_token.value], 4082 expressions=expressions, 4083 nested=nested, 4084 values=values, 4085 prefix=prefix, 4086 ) 4087 4088 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4089 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4090 4091 return this 4092 4093 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4094 index = self._index 4095 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4096 self._match(TokenType.COLON) 4097 column_def = self._parse_column_def(this) 4098 4099 if type_required and ( 4100 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4101 ): 4102 self._retreat(index) 4103 return self._parse_types() 4104 4105 return column_def 4106 4107 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4108 if not self._match_text_seq("AT", "TIME", "ZONE"): 4109 return this 4110 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4111 4112 def _parse_column(self) -> t.Optional[exp.Expression]: 4113 this = self._parse_column_reference() 4114 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4115 4116 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4117 this = self._parse_field() 4118 if ( 4119 not this 4120 and self._match(TokenType.VALUES, advance=False) 4121 and self.VALUES_FOLLOWED_BY_PAREN 4122 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4123 ): 4124 this = self._parse_id_var() 4125 4126 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 4127 4128 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4129 this = self._parse_bracket(this) 4130 4131 while self._match_set(self.COLUMN_OPERATORS): 4132 op_token = self._prev.token_type 4133 op = self.COLUMN_OPERATORS.get(op_token) 4134 4135 if op_token == TokenType.DCOLON: 4136 field = self._parse_types() 4137 if not field: 4138 self.raise_error("Expected type") 4139 elif op and self._curr: 4140 field = self._parse_column_reference() 4141 else: 4142 field = self._parse_field(anonymous_func=True, any_token=True) 4143 4144 if isinstance(field, exp.Func) and this: 4145 # bigquery allows function calls like x.y.count(...) 4146 # SAFE.SUBSTR(...) 4147 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4148 this = exp.replace_tree( 4149 this, 4150 lambda n: ( 4151 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4152 if n.table 4153 else n.this 4154 ) 4155 if isinstance(n, exp.Column) 4156 else n, 4157 ) 4158 4159 if op: 4160 this = op(self, this, field) 4161 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4162 this = self.expression( 4163 exp.Column, 4164 this=field, 4165 table=this.this, 4166 db=this.args.get("table"), 4167 catalog=this.args.get("db"), 4168 ) 4169 else: 4170 this = self.expression(exp.Dot, this=this, expression=field) 4171 this = self._parse_bracket(this) 4172 return this 4173 4174 def _parse_primary(self) -> t.Optional[exp.Expression]: 4175 if self._match_set(self.PRIMARY_PARSERS): 4176 token_type = self._prev.token_type 4177 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4178 4179 if token_type == TokenType.STRING: 4180 expressions = [primary] 4181 while self._match(TokenType.STRING): 4182 expressions.append(exp.Literal.string(self._prev.text)) 4183 4184 if len(expressions) > 1: 4185 return self.expression(exp.Concat, expressions=expressions) 4186 4187 return primary 4188 4189 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4190 return exp.Literal.number(f"0.{self._prev.text}") 4191 4192 if self._match(TokenType.L_PAREN): 4193 comments = self._prev_comments 4194 query = self._parse_select() 4195 4196 if query: 4197 expressions = [query] 4198 else: 4199 expressions = self._parse_expressions() 4200 4201 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4202 4203 if isinstance(this, exp.UNWRAPPED_QUERIES): 4204 this = self._parse_set_operations( 4205 self._parse_subquery(this=this, parse_alias=False) 4206 ) 4207 elif isinstance(this, exp.Subquery): 4208 this = self._parse_subquery( 4209 this=self._parse_set_operations(this), parse_alias=False 4210 ) 4211 elif len(expressions) > 1: 4212 this = self.expression(exp.Tuple, expressions=expressions) 4213 else: 4214 this = self.expression(exp.Paren, this=this) 4215 4216 if this: 4217 this.add_comments(comments) 4218 4219 self._match_r_paren(expression=this) 4220 return this 4221 4222 return None 4223 4224 def _parse_field( 4225 self, 4226 any_token: bool = False, 4227 tokens: t.Optional[t.Collection[TokenType]] = None, 4228 anonymous_func: bool = False, 4229 ) -> t.Optional[exp.Expression]: 4230 return ( 4231 self._parse_primary() 4232 or self._parse_function(anonymous=anonymous_func) 4233 or self._parse_id_var(any_token=any_token, tokens=tokens) 4234 ) 4235 4236 def _parse_function( 4237 self, 4238 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4239 anonymous: bool = False, 4240 optional_parens: bool = True, 4241 ) -> t.Optional[exp.Expression]: 4242 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4243 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4244 fn_syntax = False 4245 if ( 4246 self._match(TokenType.L_BRACE, advance=False) 4247 and self._next 4248 and self._next.text.upper() == "FN" 4249 ): 4250 self._advance(2) 4251 fn_syntax = True 4252 4253 func = self._parse_function_call( 4254 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4255 ) 4256 4257 if fn_syntax: 4258 self._match(TokenType.R_BRACE) 4259 4260 return func 4261 4262 def _parse_function_call( 4263 self, 4264 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4265 anonymous: bool = False, 4266 optional_parens: bool = True, 4267 ) -> t.Optional[exp.Expression]: 4268 if not self._curr: 4269 return None 4270 4271 comments = self._curr.comments 4272 token_type = self._curr.token_type 4273 this = self._curr.text 4274 upper = this.upper() 4275 4276 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4277 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4278 self._advance() 4279 return self._parse_window(parser(self)) 4280 4281 if not self._next or self._next.token_type != TokenType.L_PAREN: 4282 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4283 self._advance() 4284 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4285 4286 return None 4287 4288 if token_type not in self.FUNC_TOKENS: 4289 return None 4290 4291 self._advance(2) 4292 4293 parser = self.FUNCTION_PARSERS.get(upper) 4294 if parser and not anonymous: 4295 this = parser(self) 4296 else: 4297 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4298 4299 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4300 this = self.expression(subquery_predicate, this=self._parse_select()) 4301 self._match_r_paren() 4302 return this 4303 4304 if functions is None: 4305 functions = self.FUNCTIONS 4306 4307 function = functions.get(upper) 4308 4309 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4310 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4311 4312 if alias: 4313 args = self._kv_to_prop_eq(args) 4314 4315 if function and not anonymous: 4316 if "dialect" in function.__code__.co_varnames: 4317 func = function(args, dialect=self.dialect) 4318 else: 4319 func = function(args) 4320 4321 func = self.validate_expression(func, args) 4322 if not self.dialect.NORMALIZE_FUNCTIONS: 4323 func.meta["name"] = this 4324 4325 this = func 4326 else: 4327 if token_type == TokenType.IDENTIFIER: 4328 this = exp.Identifier(this=this, quoted=True) 4329 this = self.expression(exp.Anonymous, this=this, expressions=args) 4330 4331 if isinstance(this, exp.Expression): 4332 this.add_comments(comments) 4333 4334 self._match_r_paren(this) 4335 return self._parse_window(this) 4336 4337 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4338 transformed = [] 4339 4340 for e in expressions: 4341 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4342 if isinstance(e, exp.Alias): 4343 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4344 4345 if not isinstance(e, exp.PropertyEQ): 4346 e = self.expression( 4347 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4348 ) 4349 4350 if isinstance(e.this, exp.Column): 4351 e.this.replace(e.this.this) 4352 4353 transformed.append(e) 4354 4355 return transformed 4356 4357 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4358 return self._parse_column_def(self._parse_id_var()) 4359 4360 def _parse_user_defined_function( 4361 self, kind: t.Optional[TokenType] = None 4362 ) -> t.Optional[exp.Expression]: 4363 this = self._parse_id_var() 4364 4365 while self._match(TokenType.DOT): 4366 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4367 4368 if not self._match(TokenType.L_PAREN): 4369 return this 4370 4371 expressions = self._parse_csv(self._parse_function_parameter) 4372 self._match_r_paren() 4373 return self.expression( 4374 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4375 ) 4376 4377 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4378 literal = self._parse_primary() 4379 if literal: 4380 return self.expression(exp.Introducer, this=token.text, expression=literal) 4381 4382 return self.expression(exp.Identifier, this=token.text) 4383 4384 def _parse_session_parameter(self) -> exp.SessionParameter: 4385 kind = None 4386 this = self._parse_id_var() or self._parse_primary() 4387 4388 if this and self._match(TokenType.DOT): 4389 kind = this.name 4390 this = self._parse_var() or self._parse_primary() 4391 4392 return self.expression(exp.SessionParameter, this=this, kind=kind) 4393 4394 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4395 index = self._index 4396 4397 if self._match(TokenType.L_PAREN): 4398 expressions = t.cast( 4399 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4400 ) 4401 4402 if not self._match(TokenType.R_PAREN): 4403 self._retreat(index) 4404 else: 4405 expressions = [self._parse_id_var()] 4406 4407 if self._match_set(self.LAMBDAS): 4408 return self.LAMBDAS[self._prev.token_type](self, expressions) 4409 4410 self._retreat(index) 4411 4412 this: t.Optional[exp.Expression] 4413 4414 if self._match(TokenType.DISTINCT): 4415 this = self.expression( 4416 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4417 ) 4418 else: 4419 this = self._parse_select_or_expression(alias=alias) 4420 4421 return self._parse_limit( 4422 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4423 ) 4424 4425 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4426 index = self._index 4427 4428 if not self.errors: 4429 try: 4430 if self._parse_select(nested=True): 4431 return this 4432 except ParseError: 4433 pass 4434 finally: 4435 self.errors.clear() 4436 self._retreat(index) 4437 4438 if not self._match(TokenType.L_PAREN): 4439 return this 4440 4441 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4442 4443 self._match_r_paren() 4444 return self.expression(exp.Schema, this=this, expressions=args) 4445 4446 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4447 return self._parse_column_def(self._parse_field(any_token=True)) 4448 4449 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4450 # column defs are not really columns, they're identifiers 4451 if isinstance(this, exp.Column): 4452 this = this.this 4453 4454 kind = self._parse_types(schema=True) 4455 4456 if self._match_text_seq("FOR", "ORDINALITY"): 4457 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4458 4459 constraints: t.List[exp.Expression] = [] 4460 4461 if not kind and self._match(TokenType.ALIAS): 4462 constraints.append( 4463 self.expression( 4464 exp.ComputedColumnConstraint, 4465 this=self._parse_conjunction(), 4466 persisted=self._match_text_seq("PERSISTED"), 4467 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4468 ) 4469 ) 4470 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4471 self._match(TokenType.ALIAS) 4472 constraints.append( 4473 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4474 ) 4475 4476 while True: 4477 constraint = self._parse_column_constraint() 4478 if not constraint: 4479 break 4480 constraints.append(constraint) 4481 4482 if not kind and not constraints: 4483 return this 4484 4485 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4486 4487 def _parse_auto_increment( 4488 self, 4489 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4490 start = None 4491 increment = None 4492 4493 if self._match(TokenType.L_PAREN, advance=False): 4494 args = self._parse_wrapped_csv(self._parse_bitwise) 4495 start = seq_get(args, 0) 4496 increment = seq_get(args, 1) 4497 elif self._match_text_seq("START"): 4498 start = self._parse_bitwise() 4499 self._match_text_seq("INCREMENT") 4500 increment = self._parse_bitwise() 4501 4502 if start and increment: 4503 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4504 4505 return exp.AutoIncrementColumnConstraint() 4506 4507 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4508 if not self._match_text_seq("REFRESH"): 4509 self._retreat(self._index - 1) 4510 return None 4511 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4512 4513 def _parse_compress(self) -> exp.CompressColumnConstraint: 4514 if self._match(TokenType.L_PAREN, advance=False): 4515 return self.expression( 4516 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4517 ) 4518 4519 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4520 4521 def _parse_generated_as_identity( 4522 self, 4523 ) -> ( 4524 exp.GeneratedAsIdentityColumnConstraint 4525 | exp.ComputedColumnConstraint 4526 | exp.GeneratedAsRowColumnConstraint 4527 ): 4528 if self._match_text_seq("BY", "DEFAULT"): 4529 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4530 this = self.expression( 4531 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4532 ) 4533 else: 4534 self._match_text_seq("ALWAYS") 4535 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4536 4537 self._match(TokenType.ALIAS) 4538 4539 if self._match_text_seq("ROW"): 4540 start = self._match_text_seq("START") 4541 if not start: 4542 self._match(TokenType.END) 4543 hidden = self._match_text_seq("HIDDEN") 4544 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4545 4546 identity = self._match_text_seq("IDENTITY") 4547 4548 if self._match(TokenType.L_PAREN): 4549 if self._match(TokenType.START_WITH): 4550 this.set("start", self._parse_bitwise()) 4551 if self._match_text_seq("INCREMENT", "BY"): 4552 this.set("increment", self._parse_bitwise()) 4553 if self._match_text_seq("MINVALUE"): 4554 this.set("minvalue", self._parse_bitwise()) 4555 if self._match_text_seq("MAXVALUE"): 4556 this.set("maxvalue", self._parse_bitwise()) 4557 4558 if self._match_text_seq("CYCLE"): 4559 this.set("cycle", True) 4560 elif self._match_text_seq("NO", "CYCLE"): 4561 this.set("cycle", False) 4562 4563 if not identity: 4564 this.set("expression", self._parse_bitwise()) 4565 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4566 args = self._parse_csv(self._parse_bitwise) 4567 this.set("start", seq_get(args, 0)) 4568 this.set("increment", seq_get(args, 1)) 4569 4570 self._match_r_paren() 4571 4572 return this 4573 4574 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4575 self._match_text_seq("LENGTH") 4576 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4577 4578 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4579 if self._match_text_seq("NULL"): 4580 return self.expression(exp.NotNullColumnConstraint) 4581 if self._match_text_seq("CASESPECIFIC"): 4582 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4583 if self._match_text_seq("FOR", "REPLICATION"): 4584 return self.expression(exp.NotForReplicationColumnConstraint) 4585 return None 4586 4587 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4588 if self._match(TokenType.CONSTRAINT): 4589 this = self._parse_id_var() 4590 else: 4591 this = None 4592 4593 if self._match_texts(self.CONSTRAINT_PARSERS): 4594 return self.expression( 4595 exp.ColumnConstraint, 4596 this=this, 4597 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4598 ) 4599 4600 return this 4601 4602 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4603 if not self._match(TokenType.CONSTRAINT): 4604 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4605 4606 return self.expression( 4607 exp.Constraint, 4608 this=self._parse_id_var(), 4609 expressions=self._parse_unnamed_constraints(), 4610 ) 4611 4612 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4613 constraints = [] 4614 while True: 4615 constraint = self._parse_unnamed_constraint() or self._parse_function() 4616 if not constraint: 4617 break 4618 constraints.append(constraint) 4619 4620 return constraints 4621 4622 def _parse_unnamed_constraint( 4623 self, constraints: t.Optional[t.Collection[str]] = None 4624 ) -> t.Optional[exp.Expression]: 4625 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4626 constraints or self.CONSTRAINT_PARSERS 4627 ): 4628 return None 4629 4630 constraint = self._prev.text.upper() 4631 if constraint not in self.CONSTRAINT_PARSERS: 4632 self.raise_error(f"No parser found for schema constraint {constraint}.") 4633 4634 return self.CONSTRAINT_PARSERS[constraint](self) 4635 4636 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4637 self._match_text_seq("KEY") 4638 return self.expression( 4639 exp.UniqueColumnConstraint, 4640 this=self._parse_schema(self._parse_id_var(any_token=False)), 4641 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4642 on_conflict=self._parse_on_conflict(), 4643 ) 4644 4645 def _parse_key_constraint_options(self) -> t.List[str]: 4646 options = [] 4647 while True: 4648 if not self._curr: 4649 break 4650 4651 if self._match(TokenType.ON): 4652 action = None 4653 on = self._advance_any() and self._prev.text 4654 4655 if self._match_text_seq("NO", "ACTION"): 4656 action = "NO ACTION" 4657 elif self._match_text_seq("CASCADE"): 4658 action = "CASCADE" 4659 elif self._match_text_seq("RESTRICT"): 4660 action = "RESTRICT" 4661 elif self._match_pair(TokenType.SET, TokenType.NULL): 4662 action = "SET NULL" 4663 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4664 action = "SET DEFAULT" 4665 else: 4666 self.raise_error("Invalid key constraint") 4667 4668 options.append(f"ON {on} {action}") 4669 elif self._match_text_seq("NOT", "ENFORCED"): 4670 options.append("NOT ENFORCED") 4671 elif self._match_text_seq("DEFERRABLE"): 4672 options.append("DEFERRABLE") 4673 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4674 options.append("INITIALLY DEFERRED") 4675 elif self._match_text_seq("NORELY"): 4676 options.append("NORELY") 4677 elif self._match_text_seq("MATCH", "FULL"): 4678 options.append("MATCH FULL") 4679 else: 4680 break 4681 4682 return options 4683 4684 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4685 if match and not self._match(TokenType.REFERENCES): 4686 return None 4687 4688 expressions = None 4689 this = self._parse_table(schema=True) 4690 options = self._parse_key_constraint_options() 4691 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4692 4693 def _parse_foreign_key(self) -> exp.ForeignKey: 4694 expressions = self._parse_wrapped_id_vars() 4695 reference = self._parse_references() 4696 options = {} 4697 4698 while self._match(TokenType.ON): 4699 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4700 self.raise_error("Expected DELETE or UPDATE") 4701 4702 kind = self._prev.text.lower() 4703 4704 if self._match_text_seq("NO", "ACTION"): 4705 action = "NO ACTION" 4706 elif self._match(TokenType.SET): 4707 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4708 action = "SET " + self._prev.text.upper() 4709 else: 4710 self._advance() 4711 action = self._prev.text.upper() 4712 4713 options[kind] = action 4714 4715 return self.expression( 4716 exp.ForeignKey, 4717 expressions=expressions, 4718 reference=reference, 4719 **options, # type: ignore 4720 ) 4721 4722 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4723 return self._parse_field() 4724 4725 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4726 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4727 self._retreat(self._index - 1) 4728 return None 4729 4730 id_vars = self._parse_wrapped_id_vars() 4731 return self.expression( 4732 exp.PeriodForSystemTimeConstraint, 4733 this=seq_get(id_vars, 0), 4734 expression=seq_get(id_vars, 1), 4735 ) 4736 4737 def _parse_primary_key( 4738 self, wrapped_optional: bool = False, in_props: bool = False 4739 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4740 desc = ( 4741 self._match_set((TokenType.ASC, TokenType.DESC)) 4742 and self._prev.token_type == TokenType.DESC 4743 ) 4744 4745 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4746 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4747 4748 expressions = self._parse_wrapped_csv( 4749 self._parse_primary_key_part, optional=wrapped_optional 4750 ) 4751 options = self._parse_key_constraint_options() 4752 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4753 4754 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4755 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4756 4757 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4758 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4759 return this 4760 4761 bracket_kind = self._prev.token_type 4762 expressions = self._parse_csv( 4763 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4764 ) 4765 4766 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4767 self.raise_error("Expected ]") 4768 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4769 self.raise_error("Expected }") 4770 4771 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4772 if bracket_kind == TokenType.L_BRACE: 4773 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4774 elif not this or this.name.upper() == "ARRAY": 4775 this = self.expression(exp.Array, expressions=expressions) 4776 else: 4777 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4778 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4779 4780 self._add_comments(this) 4781 return self._parse_bracket(this) 4782 4783 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4784 if self._match(TokenType.COLON): 4785 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4786 return this 4787 4788 def _parse_case(self) -> t.Optional[exp.Expression]: 4789 ifs = [] 4790 default = None 4791 4792 comments = self._prev_comments 4793 expression = self._parse_conjunction() 4794 4795 while self._match(TokenType.WHEN): 4796 this = self._parse_conjunction() 4797 self._match(TokenType.THEN) 4798 then = self._parse_conjunction() 4799 ifs.append(self.expression(exp.If, this=this, true=then)) 4800 4801 if self._match(TokenType.ELSE): 4802 default = self._parse_conjunction() 4803 4804 if not self._match(TokenType.END): 4805 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4806 default = exp.column("interval") 4807 else: 4808 self.raise_error("Expected END after CASE", self._prev) 4809 4810 return self.expression( 4811 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4812 ) 4813 4814 def _parse_if(self) -> t.Optional[exp.Expression]: 4815 if self._match(TokenType.L_PAREN): 4816 args = self._parse_csv(self._parse_conjunction) 4817 this = self.validate_expression(exp.If.from_arg_list(args), args) 4818 self._match_r_paren() 4819 else: 4820 index = self._index - 1 4821 4822 if self.NO_PAREN_IF_COMMANDS and index == 0: 4823 return self._parse_as_command(self._prev) 4824 4825 condition = self._parse_conjunction() 4826 4827 if not condition: 4828 self._retreat(index) 4829 return None 4830 4831 self._match(TokenType.THEN) 4832 true = self._parse_conjunction() 4833 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4834 self._match(TokenType.END) 4835 this = self.expression(exp.If, this=condition, true=true, false=false) 4836 4837 return this 4838 4839 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4840 if not self._match_text_seq("VALUE", "FOR"): 4841 self._retreat(self._index - 1) 4842 return None 4843 4844 return self.expression( 4845 exp.NextValueFor, 4846 this=self._parse_column(), 4847 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4848 ) 4849 4850 def _parse_extract(self) -> exp.Extract: 4851 this = self._parse_function() or self._parse_var() or self._parse_type() 4852 4853 if self._match(TokenType.FROM): 4854 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4855 4856 if not self._match(TokenType.COMMA): 4857 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4858 4859 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4860 4861 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4862 this = self._parse_conjunction() 4863 4864 if not self._match(TokenType.ALIAS): 4865 if self._match(TokenType.COMMA): 4866 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4867 4868 self.raise_error("Expected AS after CAST") 4869 4870 fmt = None 4871 to = self._parse_types() 4872 4873 if self._match(TokenType.FORMAT): 4874 fmt_string = self._parse_string() 4875 fmt = self._parse_at_time_zone(fmt_string) 4876 4877 if not to: 4878 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4879 if to.this in exp.DataType.TEMPORAL_TYPES: 4880 this = self.expression( 4881 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4882 this=this, 4883 format=exp.Literal.string( 4884 format_time( 4885 fmt_string.this if fmt_string else "", 4886 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4887 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4888 ) 4889 ), 4890 ) 4891 4892 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4893 this.set("zone", fmt.args["zone"]) 4894 return this 4895 elif not to: 4896 self.raise_error("Expected TYPE after CAST") 4897 elif isinstance(to, exp.Identifier): 4898 to = exp.DataType.build(to.name, udt=True) 4899 elif to.this == exp.DataType.Type.CHAR: 4900 if self._match(TokenType.CHARACTER_SET): 4901 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4902 4903 return self.expression( 4904 exp.Cast if strict else exp.TryCast, 4905 this=this, 4906 to=to, 4907 format=fmt, 4908 safe=safe, 4909 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 4910 ) 4911 4912 def _parse_string_agg(self) -> exp.Expression: 4913 if self._match(TokenType.DISTINCT): 4914 args: t.List[t.Optional[exp.Expression]] = [ 4915 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4916 ] 4917 if self._match(TokenType.COMMA): 4918 args.extend(self._parse_csv(self._parse_conjunction)) 4919 else: 4920 args = self._parse_csv(self._parse_conjunction) # type: ignore 4921 4922 index = self._index 4923 if not self._match(TokenType.R_PAREN) and args: 4924 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4925 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4926 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4927 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4928 4929 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4930 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4931 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4932 if not self._match_text_seq("WITHIN", "GROUP"): 4933 self._retreat(index) 4934 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4935 4936 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4937 order = self._parse_order(this=seq_get(args, 0)) 4938 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4939 4940 def _parse_convert( 4941 self, strict: bool, safe: t.Optional[bool] = None 4942 ) -> t.Optional[exp.Expression]: 4943 this = self._parse_bitwise() 4944 4945 if self._match(TokenType.USING): 4946 to: t.Optional[exp.Expression] = self.expression( 4947 exp.CharacterSet, this=self._parse_var() 4948 ) 4949 elif self._match(TokenType.COMMA): 4950 to = self._parse_types() 4951 else: 4952 to = None 4953 4954 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4955 4956 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4957 """ 4958 There are generally two variants of the DECODE function: 4959 4960 - DECODE(bin, charset) 4961 - DECODE(expression, search, result [, search, result] ... [, default]) 4962 4963 The second variant will always be parsed into a CASE expression. Note that NULL 4964 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4965 instead of relying on pattern matching. 4966 """ 4967 args = self._parse_csv(self._parse_conjunction) 4968 4969 if len(args) < 3: 4970 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4971 4972 expression, *expressions = args 4973 if not expression: 4974 return None 4975 4976 ifs = [] 4977 for search, result in zip(expressions[::2], expressions[1::2]): 4978 if not search or not result: 4979 return None 4980 4981 if isinstance(search, exp.Literal): 4982 ifs.append( 4983 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4984 ) 4985 elif isinstance(search, exp.Null): 4986 ifs.append( 4987 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4988 ) 4989 else: 4990 cond = exp.or_( 4991 exp.EQ(this=expression.copy(), expression=search), 4992 exp.and_( 4993 exp.Is(this=expression.copy(), expression=exp.Null()), 4994 exp.Is(this=search.copy(), expression=exp.Null()), 4995 copy=False, 4996 ), 4997 copy=False, 4998 ) 4999 ifs.append(exp.If(this=cond, true=result)) 5000 5001 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5002 5003 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5004 self._match_text_seq("KEY") 5005 key = self._parse_column() 5006 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5007 self._match_text_seq("VALUE") 5008 value = self._parse_bitwise() 5009 5010 if not key and not value: 5011 return None 5012 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5013 5014 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5015 if not this or not self._match_text_seq("FORMAT", "JSON"): 5016 return this 5017 5018 return self.expression(exp.FormatJson, this=this) 5019 5020 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5021 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5022 for value in values: 5023 if self._match_text_seq(value, "ON", on): 5024 return f"{value} ON {on}" 5025 5026 return None 5027 5028 @t.overload 5029 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5030 5031 @t.overload 5032 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5033 5034 def _parse_json_object(self, agg=False): 5035 star = self._parse_star() 5036 expressions = ( 5037 [star] 5038 if star 5039 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5040 ) 5041 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5042 5043 unique_keys = None 5044 if self._match_text_seq("WITH", "UNIQUE"): 5045 unique_keys = True 5046 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5047 unique_keys = False 5048 5049 self._match_text_seq("KEYS") 5050 5051 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5052 self._parse_type() 5053 ) 5054 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5055 5056 return self.expression( 5057 exp.JSONObjectAgg if agg else exp.JSONObject, 5058 expressions=expressions, 5059 null_handling=null_handling, 5060 unique_keys=unique_keys, 5061 return_type=return_type, 5062 encoding=encoding, 5063 ) 5064 5065 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5066 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5067 if not self._match_text_seq("NESTED"): 5068 this = self._parse_id_var() 5069 kind = self._parse_types(allow_identifiers=False) 5070 nested = None 5071 else: 5072 this = None 5073 kind = None 5074 nested = True 5075 5076 path = self._match_text_seq("PATH") and self._parse_string() 5077 nested_schema = nested and self._parse_json_schema() 5078 5079 return self.expression( 5080 exp.JSONColumnDef, 5081 this=this, 5082 kind=kind, 5083 path=path, 5084 nested_schema=nested_schema, 5085 ) 5086 5087 def _parse_json_schema(self) -> exp.JSONSchema: 5088 self._match_text_seq("COLUMNS") 5089 return self.expression( 5090 exp.JSONSchema, 5091 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5092 ) 5093 5094 def _parse_json_table(self) -> exp.JSONTable: 5095 this = self._parse_format_json(self._parse_bitwise()) 5096 path = self._match(TokenType.COMMA) and self._parse_string() 5097 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5098 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5099 schema = self._parse_json_schema() 5100 5101 return exp.JSONTable( 5102 this=this, 5103 schema=schema, 5104 path=path, 5105 error_handling=error_handling, 5106 empty_handling=empty_handling, 5107 ) 5108 5109 def _parse_match_against(self) -> exp.MatchAgainst: 5110 expressions = self._parse_csv(self._parse_column) 5111 5112 self._match_text_seq(")", "AGAINST", "(") 5113 5114 this = self._parse_string() 5115 5116 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5117 modifier = "IN NATURAL LANGUAGE MODE" 5118 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5119 modifier = f"{modifier} WITH QUERY EXPANSION" 5120 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5121 modifier = "IN BOOLEAN MODE" 5122 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5123 modifier = "WITH QUERY EXPANSION" 5124 else: 5125 modifier = None 5126 5127 return self.expression( 5128 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5129 ) 5130 5131 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5132 def _parse_open_json(self) -> exp.OpenJSON: 5133 this = self._parse_bitwise() 5134 path = self._match(TokenType.COMMA) and self._parse_string() 5135 5136 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5137 this = self._parse_field(any_token=True) 5138 kind = self._parse_types() 5139 path = self._parse_string() 5140 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5141 5142 return self.expression( 5143 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5144 ) 5145 5146 expressions = None 5147 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5148 self._match_l_paren() 5149 expressions = self._parse_csv(_parse_open_json_column_def) 5150 5151 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5152 5153 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5154 args = self._parse_csv(self._parse_bitwise) 5155 5156 if self._match(TokenType.IN): 5157 return self.expression( 5158 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5159 ) 5160 5161 if haystack_first: 5162 haystack = seq_get(args, 0) 5163 needle = seq_get(args, 1) 5164 else: 5165 needle = seq_get(args, 0) 5166 haystack = seq_get(args, 1) 5167 5168 return self.expression( 5169 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5170 ) 5171 5172 def _parse_predict(self) -> exp.Predict: 5173 self._match_text_seq("MODEL") 5174 this = self._parse_table() 5175 5176 self._match(TokenType.COMMA) 5177 self._match_text_seq("TABLE") 5178 5179 return self.expression( 5180 exp.Predict, 5181 this=this, 5182 expression=self._parse_table(), 5183 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5184 ) 5185 5186 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5187 args = self._parse_csv(self._parse_table) 5188 return exp.JoinHint(this=func_name.upper(), expressions=args) 5189 5190 def _parse_substring(self) -> exp.Substring: 5191 # Postgres supports the form: substring(string [from int] [for int]) 5192 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5193 5194 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5195 5196 if self._match(TokenType.FROM): 5197 args.append(self._parse_bitwise()) 5198 if self._match(TokenType.FOR): 5199 args.append(self._parse_bitwise()) 5200 5201 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5202 5203 def _parse_trim(self) -> exp.Trim: 5204 # https://www.w3resource.com/sql/character-functions/trim.php 5205 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5206 5207 position = None 5208 collation = None 5209 expression = None 5210 5211 if self._match_texts(self.TRIM_TYPES): 5212 position = self._prev.text.upper() 5213 5214 this = self._parse_bitwise() 5215 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5216 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5217 expression = self._parse_bitwise() 5218 5219 if invert_order: 5220 this, expression = expression, this 5221 5222 if self._match(TokenType.COLLATE): 5223 collation = self._parse_bitwise() 5224 5225 return self.expression( 5226 exp.Trim, this=this, position=position, expression=expression, collation=collation 5227 ) 5228 5229 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5230 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5231 5232 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5233 return self._parse_window(self._parse_id_var(), alias=True) 5234 5235 def _parse_respect_or_ignore_nulls( 5236 self, this: t.Optional[exp.Expression] 5237 ) -> t.Optional[exp.Expression]: 5238 if self._match_text_seq("IGNORE", "NULLS"): 5239 return self.expression(exp.IgnoreNulls, this=this) 5240 if self._match_text_seq("RESPECT", "NULLS"): 5241 return self.expression(exp.RespectNulls, this=this) 5242 return this 5243 5244 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5245 if self._match(TokenType.HAVING): 5246 self._match_texts(("MAX", "MIN")) 5247 max = self._prev.text.upper() != "MIN" 5248 return self.expression( 5249 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5250 ) 5251 5252 return this 5253 5254 def _parse_window( 5255 self, this: t.Optional[exp.Expression], alias: bool = False 5256 ) -> t.Optional[exp.Expression]: 5257 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5258 self._match(TokenType.WHERE) 5259 this = self.expression( 5260 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5261 ) 5262 self._match_r_paren() 5263 5264 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5265 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5266 if self._match_text_seq("WITHIN", "GROUP"): 5267 order = self._parse_wrapped(self._parse_order) 5268 this = self.expression(exp.WithinGroup, this=this, expression=order) 5269 5270 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5271 # Some dialects choose to implement and some do not. 5272 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5273 5274 # There is some code above in _parse_lambda that handles 5275 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5276 5277 # The below changes handle 5278 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5279 5280 # Oracle allows both formats 5281 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5282 # and Snowflake chose to do the same for familiarity 5283 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5284 if isinstance(this, exp.AggFunc): 5285 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5286 5287 if ignore_respect and ignore_respect is not this: 5288 ignore_respect.replace(ignore_respect.this) 5289 this = self.expression(ignore_respect.__class__, this=this) 5290 5291 this = self._parse_respect_or_ignore_nulls(this) 5292 5293 # bigquery select from window x AS (partition by ...) 5294 if alias: 5295 over = None 5296 self._match(TokenType.ALIAS) 5297 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5298 return this 5299 else: 5300 over = self._prev.text.upper() 5301 5302 if not self._match(TokenType.L_PAREN): 5303 return self.expression( 5304 exp.Window, this=this, alias=self._parse_id_var(False), over=over 5305 ) 5306 5307 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5308 5309 first = self._match(TokenType.FIRST) 5310 if self._match_text_seq("LAST"): 5311 first = False 5312 5313 partition, order = self._parse_partition_and_order() 5314 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5315 5316 if kind: 5317 self._match(TokenType.BETWEEN) 5318 start = self._parse_window_spec() 5319 self._match(TokenType.AND) 5320 end = self._parse_window_spec() 5321 5322 spec = self.expression( 5323 exp.WindowSpec, 5324 kind=kind, 5325 start=start["value"], 5326 start_side=start["side"], 5327 end=end["value"], 5328 end_side=end["side"], 5329 ) 5330 else: 5331 spec = None 5332 5333 self._match_r_paren() 5334 5335 window = self.expression( 5336 exp.Window, 5337 this=this, 5338 partition_by=partition, 5339 order=order, 5340 spec=spec, 5341 alias=window_alias, 5342 over=over, 5343 first=first, 5344 ) 5345 5346 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5347 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5348 return self._parse_window(window, alias=alias) 5349 5350 return window 5351 5352 def _parse_partition_and_order( 5353 self, 5354 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5355 return self._parse_partition_by(), self._parse_order() 5356 5357 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5358 self._match(TokenType.BETWEEN) 5359 5360 return { 5361 "value": ( 5362 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5363 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5364 or self._parse_bitwise() 5365 ), 5366 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5367 } 5368 5369 def _parse_alias( 5370 self, this: t.Optional[exp.Expression], explicit: bool = False 5371 ) -> t.Optional[exp.Expression]: 5372 any_token = self._match(TokenType.ALIAS) 5373 comments = self._prev_comments 5374 5375 if explicit and not any_token: 5376 return this 5377 5378 if self._match(TokenType.L_PAREN): 5379 aliases = self.expression( 5380 exp.Aliases, 5381 comments=comments, 5382 this=this, 5383 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5384 ) 5385 self._match_r_paren(aliases) 5386 return aliases 5387 5388 alias = self._parse_id_var(any_token) or ( 5389 self.STRING_ALIASES and self._parse_string_as_identifier() 5390 ) 5391 5392 if alias: 5393 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5394 column = this.this 5395 5396 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5397 if not this.comments and column and column.comments: 5398 this.comments = column.comments 5399 column.comments = None 5400 5401 return this 5402 5403 def _parse_id_var( 5404 self, 5405 any_token: bool = True, 5406 tokens: t.Optional[t.Collection[TokenType]] = None, 5407 ) -> t.Optional[exp.Expression]: 5408 identifier = self._parse_identifier() 5409 5410 if identifier: 5411 return identifier 5412 5413 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5414 quoted = self._prev.token_type == TokenType.STRING 5415 return exp.Identifier(this=self._prev.text, quoted=quoted) 5416 5417 return None 5418 5419 def _parse_string(self) -> t.Optional[exp.Expression]: 5420 if self._match_set(self.STRING_PARSERS): 5421 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5422 return self._parse_placeholder() 5423 5424 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5425 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5426 5427 def _parse_number(self) -> t.Optional[exp.Expression]: 5428 if self._match_set(self.NUMERIC_PARSERS): 5429 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5430 return self._parse_placeholder() 5431 5432 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5433 if self._match(TokenType.IDENTIFIER): 5434 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5435 return self._parse_placeholder() 5436 5437 def _parse_var( 5438 self, 5439 any_token: bool = False, 5440 tokens: t.Optional[t.Collection[TokenType]] = None, 5441 upper: bool = False, 5442 ) -> t.Optional[exp.Expression]: 5443 if ( 5444 (any_token and self._advance_any()) 5445 or self._match(TokenType.VAR) 5446 or (self._match_set(tokens) if tokens else False) 5447 ): 5448 return self.expression( 5449 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5450 ) 5451 return self._parse_placeholder() 5452 5453 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5454 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5455 self._advance() 5456 return self._prev 5457 return None 5458 5459 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5460 return self._parse_var() or self._parse_string() 5461 5462 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5463 return self._parse_primary() or self._parse_var(any_token=True) 5464 5465 def _parse_null(self) -> t.Optional[exp.Expression]: 5466 if self._match_set(self.NULL_TOKENS): 5467 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5468 return self._parse_placeholder() 5469 5470 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5471 if self._match(TokenType.TRUE): 5472 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5473 if self._match(TokenType.FALSE): 5474 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5475 return self._parse_placeholder() 5476 5477 def _parse_star(self) -> t.Optional[exp.Expression]: 5478 if self._match(TokenType.STAR): 5479 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5480 return self._parse_placeholder() 5481 5482 def _parse_parameter(self) -> exp.Parameter: 5483 self._match(TokenType.L_BRACE) 5484 this = self._parse_identifier() or self._parse_primary_or_var() 5485 expression = self._match(TokenType.COLON) and ( 5486 self._parse_identifier() or self._parse_primary_or_var() 5487 ) 5488 self._match(TokenType.R_BRACE) 5489 return self.expression(exp.Parameter, this=this, expression=expression) 5490 5491 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5492 if self._match_set(self.PLACEHOLDER_PARSERS): 5493 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5494 if placeholder: 5495 return placeholder 5496 self._advance(-1) 5497 return None 5498 5499 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5500 if not self._match(TokenType.EXCEPT): 5501 return None 5502 if self._match(TokenType.L_PAREN, advance=False): 5503 return self._parse_wrapped_csv(self._parse_column) 5504 5505 except_column = self._parse_column() 5506 return [except_column] if except_column else None 5507 5508 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5509 if not self._match(TokenType.REPLACE): 5510 return None 5511 if self._match(TokenType.L_PAREN, advance=False): 5512 return self._parse_wrapped_csv(self._parse_expression) 5513 5514 replace_expression = self._parse_expression() 5515 return [replace_expression] if replace_expression else None 5516 5517 def _parse_csv( 5518 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5519 ) -> t.List[exp.Expression]: 5520 parse_result = parse_method() 5521 items = [parse_result] if parse_result is not None else [] 5522 5523 while self._match(sep): 5524 self._add_comments(parse_result) 5525 parse_result = parse_method() 5526 if parse_result is not None: 5527 items.append(parse_result) 5528 5529 return items 5530 5531 def _parse_tokens( 5532 self, parse_method: t.Callable, expressions: t.Dict 5533 ) -> t.Optional[exp.Expression]: 5534 this = parse_method() 5535 5536 while self._match_set(expressions): 5537 this = self.expression( 5538 expressions[self._prev.token_type], 5539 this=this, 5540 comments=self._prev_comments, 5541 expression=parse_method(), 5542 ) 5543 5544 return this 5545 5546 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5547 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5548 5549 def _parse_wrapped_csv( 5550 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5551 ) -> t.List[exp.Expression]: 5552 return self._parse_wrapped( 5553 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5554 ) 5555 5556 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5557 wrapped = self._match(TokenType.L_PAREN) 5558 if not wrapped and not optional: 5559 self.raise_error("Expecting (") 5560 parse_result = parse_method() 5561 if wrapped: 5562 self._match_r_paren() 5563 return parse_result 5564 5565 def _parse_expressions(self) -> t.List[exp.Expression]: 5566 return self._parse_csv(self._parse_expression) 5567 5568 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5569 return self._parse_select() or self._parse_set_operations( 5570 self._parse_expression() if alias else self._parse_conjunction() 5571 ) 5572 5573 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5574 return self._parse_query_modifiers( 5575 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5576 ) 5577 5578 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5579 this = None 5580 if self._match_texts(self.TRANSACTION_KIND): 5581 this = self._prev.text 5582 5583 self._match_texts(("TRANSACTION", "WORK")) 5584 5585 modes = [] 5586 while True: 5587 mode = [] 5588 while self._match(TokenType.VAR): 5589 mode.append(self._prev.text) 5590 5591 if mode: 5592 modes.append(" ".join(mode)) 5593 if not self._match(TokenType.COMMA): 5594 break 5595 5596 return self.expression(exp.Transaction, this=this, modes=modes) 5597 5598 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5599 chain = None 5600 savepoint = None 5601 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5602 5603 self._match_texts(("TRANSACTION", "WORK")) 5604 5605 if self._match_text_seq("TO"): 5606 self._match_text_seq("SAVEPOINT") 5607 savepoint = self._parse_id_var() 5608 5609 if self._match(TokenType.AND): 5610 chain = not self._match_text_seq("NO") 5611 self._match_text_seq("CHAIN") 5612 5613 if is_rollback: 5614 return self.expression(exp.Rollback, savepoint=savepoint) 5615 5616 return self.expression(exp.Commit, chain=chain) 5617 5618 def _parse_refresh(self) -> exp.Refresh: 5619 self._match(TokenType.TABLE) 5620 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5621 5622 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5623 if not self._match_text_seq("ADD"): 5624 return None 5625 5626 self._match(TokenType.COLUMN) 5627 exists_column = self._parse_exists(not_=True) 5628 expression = self._parse_field_def() 5629 5630 if expression: 5631 expression.set("exists", exists_column) 5632 5633 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5634 if self._match_texts(("FIRST", "AFTER")): 5635 position = self._prev.text 5636 column_position = self.expression( 5637 exp.ColumnPosition, this=self._parse_column(), position=position 5638 ) 5639 expression.set("position", column_position) 5640 5641 return expression 5642 5643 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5644 drop = self._match(TokenType.DROP) and self._parse_drop() 5645 if drop and not isinstance(drop, exp.Command): 5646 drop.set("kind", drop.args.get("kind", "COLUMN")) 5647 return drop 5648 5649 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5650 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5651 return self.expression( 5652 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5653 ) 5654 5655 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5656 index = self._index - 1 5657 5658 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5659 return self._parse_csv( 5660 lambda: self.expression( 5661 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5662 ) 5663 ) 5664 5665 self._retreat(index) 5666 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5667 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5668 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5669 5670 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5671 self._match(TokenType.COLUMN) 5672 column = self._parse_field(any_token=True) 5673 5674 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5675 return self.expression(exp.AlterColumn, this=column, drop=True) 5676 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5677 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5678 if self._match(TokenType.COMMENT): 5679 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5680 5681 self._match_text_seq("SET", "DATA") 5682 return self.expression( 5683 exp.AlterColumn, 5684 this=column, 5685 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5686 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5687 using=self._match(TokenType.USING) and self._parse_conjunction(), 5688 ) 5689 5690 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5691 index = self._index - 1 5692 5693 partition_exists = self._parse_exists() 5694 if self._match(TokenType.PARTITION, advance=False): 5695 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5696 5697 self._retreat(index) 5698 return self._parse_csv(self._parse_drop_column) 5699 5700 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5701 if self._match(TokenType.COLUMN): 5702 exists = self._parse_exists() 5703 old_column = self._parse_column() 5704 to = self._match_text_seq("TO") 5705 new_column = self._parse_column() 5706 5707 if old_column is None or to is None or new_column is None: 5708 return None 5709 5710 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5711 5712 self._match_text_seq("TO") 5713 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5714 5715 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5716 start = self._prev 5717 5718 if not self._match(TokenType.TABLE): 5719 return self._parse_as_command(start) 5720 5721 exists = self._parse_exists() 5722 only = self._match_text_seq("ONLY") 5723 this = self._parse_table(schema=True) 5724 5725 if self._next: 5726 self._advance() 5727 5728 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5729 if parser: 5730 actions = ensure_list(parser(self)) 5731 options = self._parse_csv(self._parse_property) 5732 5733 if not self._curr and actions: 5734 return self.expression( 5735 exp.AlterTable, 5736 this=this, 5737 exists=exists, 5738 actions=actions, 5739 only=only, 5740 options=options, 5741 ) 5742 5743 return self._parse_as_command(start) 5744 5745 def _parse_merge(self) -> exp.Merge: 5746 self._match(TokenType.INTO) 5747 target = self._parse_table() 5748 5749 if target and self._match(TokenType.ALIAS, advance=False): 5750 target.set("alias", self._parse_table_alias()) 5751 5752 self._match(TokenType.USING) 5753 using = self._parse_table() 5754 5755 self._match(TokenType.ON) 5756 on = self._parse_conjunction() 5757 5758 return self.expression( 5759 exp.Merge, 5760 this=target, 5761 using=using, 5762 on=on, 5763 expressions=self._parse_when_matched(), 5764 ) 5765 5766 def _parse_when_matched(self) -> t.List[exp.When]: 5767 whens = [] 5768 5769 while self._match(TokenType.WHEN): 5770 matched = not self._match(TokenType.NOT) 5771 self._match_text_seq("MATCHED") 5772 source = ( 5773 False 5774 if self._match_text_seq("BY", "TARGET") 5775 else self._match_text_seq("BY", "SOURCE") 5776 ) 5777 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5778 5779 self._match(TokenType.THEN) 5780 5781 if self._match(TokenType.INSERT): 5782 _this = self._parse_star() 5783 if _this: 5784 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5785 else: 5786 then = self.expression( 5787 exp.Insert, 5788 this=self._parse_value(), 5789 expression=self._match_text_seq("VALUES") and self._parse_value(), 5790 ) 5791 elif self._match(TokenType.UPDATE): 5792 expressions = self._parse_star() 5793 if expressions: 5794 then = self.expression(exp.Update, expressions=expressions) 5795 else: 5796 then = self.expression( 5797 exp.Update, 5798 expressions=self._match(TokenType.SET) 5799 and self._parse_csv(self._parse_equality), 5800 ) 5801 elif self._match(TokenType.DELETE): 5802 then = self.expression(exp.Var, this=self._prev.text) 5803 else: 5804 then = None 5805 5806 whens.append( 5807 self.expression( 5808 exp.When, 5809 matched=matched, 5810 source=source, 5811 condition=condition, 5812 then=then, 5813 ) 5814 ) 5815 return whens 5816 5817 def _parse_show(self) -> t.Optional[exp.Expression]: 5818 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5819 if parser: 5820 return parser(self) 5821 return self._parse_as_command(self._prev) 5822 5823 def _parse_set_item_assignment( 5824 self, kind: t.Optional[str] = None 5825 ) -> t.Optional[exp.Expression]: 5826 index = self._index 5827 5828 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5829 return self._parse_set_transaction(global_=kind == "GLOBAL") 5830 5831 left = self._parse_primary() or self._parse_id_var() 5832 assignment_delimiter = self._match_texts(("=", "TO")) 5833 5834 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5835 self._retreat(index) 5836 return None 5837 5838 right = self._parse_statement() or self._parse_id_var() 5839 this = self.expression(exp.EQ, this=left, expression=right) 5840 5841 return self.expression(exp.SetItem, this=this, kind=kind) 5842 5843 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5844 self._match_text_seq("TRANSACTION") 5845 characteristics = self._parse_csv( 5846 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5847 ) 5848 return self.expression( 5849 exp.SetItem, 5850 expressions=characteristics, 5851 kind="TRANSACTION", 5852 **{"global": global_}, # type: ignore 5853 ) 5854 5855 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5856 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5857 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5858 5859 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5860 index = self._index 5861 set_ = self.expression( 5862 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5863 ) 5864 5865 if self._curr: 5866 self._retreat(index) 5867 return self._parse_as_command(self._prev) 5868 5869 return set_ 5870 5871 def _parse_var_from_options( 5872 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5873 ) -> t.Optional[exp.Var]: 5874 start = self._curr 5875 if not start: 5876 return None 5877 5878 option = start.text.upper() 5879 continuations = options.get(option) 5880 5881 index = self._index 5882 self._advance() 5883 for keywords in continuations or []: 5884 if isinstance(keywords, str): 5885 keywords = (keywords,) 5886 5887 if self._match_text_seq(*keywords): 5888 option = f"{option} {' '.join(keywords)}" 5889 break 5890 else: 5891 if continuations or continuations is None: 5892 if raise_unmatched: 5893 self.raise_error(f"Unknown option {option}") 5894 5895 self._retreat(index) 5896 return None 5897 5898 return exp.var(option) 5899 5900 def _parse_as_command(self, start: Token) -> exp.Command: 5901 while self._curr: 5902 self._advance() 5903 text = self._find_sql(start, self._prev) 5904 size = len(start.text) 5905 self._warn_unsupported() 5906 return exp.Command(this=text[:size], expression=text[size:]) 5907 5908 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5909 settings = [] 5910 5911 self._match_l_paren() 5912 kind = self._parse_id_var() 5913 5914 if self._match(TokenType.L_PAREN): 5915 while True: 5916 key = self._parse_id_var() 5917 value = self._parse_primary() 5918 5919 if not key and value is None: 5920 break 5921 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5922 self._match(TokenType.R_PAREN) 5923 5924 self._match_r_paren() 5925 5926 return self.expression( 5927 exp.DictProperty, 5928 this=this, 5929 kind=kind.this if kind else None, 5930 settings=settings, 5931 ) 5932 5933 def _parse_dict_range(self, this: str) -> exp.DictRange: 5934 self._match_l_paren() 5935 has_min = self._match_text_seq("MIN") 5936 if has_min: 5937 min = self._parse_var() or self._parse_primary() 5938 self._match_text_seq("MAX") 5939 max = self._parse_var() or self._parse_primary() 5940 else: 5941 max = self._parse_var() or self._parse_primary() 5942 min = exp.Literal.number(0) 5943 self._match_r_paren() 5944 return self.expression(exp.DictRange, this=this, min=min, max=max) 5945 5946 def _parse_comprehension( 5947 self, this: t.Optional[exp.Expression] 5948 ) -> t.Optional[exp.Comprehension]: 5949 index = self._index 5950 expression = self._parse_column() 5951 if not self._match(TokenType.IN): 5952 self._retreat(index - 1) 5953 return None 5954 iterator = self._parse_column() 5955 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5956 return self.expression( 5957 exp.Comprehension, 5958 this=this, 5959 expression=expression, 5960 iterator=iterator, 5961 condition=condition, 5962 ) 5963 5964 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5965 if self._match(TokenType.HEREDOC_STRING): 5966 return self.expression(exp.Heredoc, this=self._prev.text) 5967 5968 if not self._match_text_seq("$"): 5969 return None 5970 5971 tags = ["$"] 5972 tag_text = None 5973 5974 if self._is_connected(): 5975 self._advance() 5976 tags.append(self._prev.text.upper()) 5977 else: 5978 self.raise_error("No closing $ found") 5979 5980 if tags[-1] != "$": 5981 if self._is_connected() and self._match_text_seq("$"): 5982 tag_text = tags[-1] 5983 tags.append("$") 5984 else: 5985 self.raise_error("No closing $ found") 5986 5987 heredoc_start = self._curr 5988 5989 while self._curr: 5990 if self._match_text_seq(*tags, advance=False): 5991 this = self._find_sql(heredoc_start, self._prev) 5992 self._advance(len(tags)) 5993 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5994 5995 self._advance() 5996 5997 self.raise_error(f"No closing {''.join(tags)} found") 5998 return None 5999 6000 def _find_parser( 6001 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6002 ) -> t.Optional[t.Callable]: 6003 if not self._curr: 6004 return None 6005 6006 index = self._index 6007 this = [] 6008 while True: 6009 # The current token might be multiple words 6010 curr = self._curr.text.upper() 6011 key = curr.split(" ") 6012 this.append(curr) 6013 6014 self._advance() 6015 result, trie = in_trie(trie, key) 6016 if result == TrieResult.FAILED: 6017 break 6018 6019 if result == TrieResult.EXISTS: 6020 subparser = parsers[" ".join(this)] 6021 return subparser 6022 6023 self._retreat(index) 6024 return None 6025 6026 def _match(self, token_type, advance=True, expression=None): 6027 if not self._curr: 6028 return None 6029 6030 if self._curr.token_type == token_type: 6031 if advance: 6032 self._advance() 6033 self._add_comments(expression) 6034 return True 6035 6036 return None 6037 6038 def _match_set(self, types, advance=True): 6039 if not self._curr: 6040 return None 6041 6042 if self._curr.token_type in types: 6043 if advance: 6044 self._advance() 6045 return True 6046 6047 return None 6048 6049 def _match_pair(self, token_type_a, token_type_b, advance=True): 6050 if not self._curr or not self._next: 6051 return None 6052 6053 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6054 if advance: 6055 self._advance(2) 6056 return True 6057 6058 return None 6059 6060 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6061 if not self._match(TokenType.L_PAREN, expression=expression): 6062 self.raise_error("Expecting (") 6063 6064 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6065 if not self._match(TokenType.R_PAREN, expression=expression): 6066 self.raise_error("Expecting )") 6067 6068 def _match_texts(self, texts, advance=True): 6069 if self._curr and self._curr.text.upper() in texts: 6070 if advance: 6071 self._advance() 6072 return True 6073 return None 6074 6075 def _match_text_seq(self, *texts, advance=True): 6076 index = self._index 6077 for text in texts: 6078 if self._curr and self._curr.text.upper() == text: 6079 self._advance() 6080 else: 6081 self._retreat(index) 6082 return None 6083 6084 if not advance: 6085 self._retreat(index) 6086 6087 return True 6088 6089 def _replace_lambda( 6090 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6091 ) -> t.Optional[exp.Expression]: 6092 if not node: 6093 return node 6094 6095 for column in node.find_all(exp.Column): 6096 if column.parts[0].name in lambda_variables: 6097 dot_or_id = column.to_dot() if column.table else column.this 6098 parent = column.parent 6099 6100 while isinstance(parent, exp.Dot): 6101 if not isinstance(parent.parent, exp.Dot): 6102 parent.replace(dot_or_id) 6103 break 6104 parent = parent.parent 6105 else: 6106 if column is node: 6107 node = dot_or_id 6108 else: 6109 column.replace(dot_or_id) 6110 return node 6111 6112 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6113 start = self._prev 6114 6115 # Not to be confused with TRUNCATE(number, decimals) function call 6116 if self._match(TokenType.L_PAREN): 6117 self._retreat(self._index - 2) 6118 return self._parse_function() 6119 6120 # Clickhouse supports TRUNCATE DATABASE as well 6121 is_database = self._match(TokenType.DATABASE) 6122 6123 self._match(TokenType.TABLE) 6124 6125 exists = self._parse_exists(not_=False) 6126 6127 expressions = self._parse_csv( 6128 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6129 ) 6130 6131 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6132 6133 if self._match_text_seq("RESTART", "IDENTITY"): 6134 identity = "RESTART" 6135 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6136 identity = "CONTINUE" 6137 else: 6138 identity = None 6139 6140 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6141 option = self._prev.text 6142 else: 6143 option = None 6144 6145 partition = self._parse_partition() 6146 6147 # Fallback case 6148 if self._curr: 6149 return self._parse_as_command(start) 6150 6151 return self.expression( 6152 exp.TruncateTable, 6153 expressions=expressions, 6154 is_database=is_database, 6155 exists=exists, 6156 cluster=cluster, 6157 identity=identity, 6158 option=option, 6159 partition=partition, 6160 ) 6161 6162 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6163 this = self._parse_ordered(self._parse_opclass) 6164 6165 if not self._match(TokenType.WITH): 6166 return this 6167 6168 op = self._parse_var(any_token=True) 6169 6170 return self.expression(exp.WithOperator, this=this, op=op)
24def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 25 if len(args) == 1 and args[0].is_star: 26 return exp.StarMap(this=args[0]) 27 28 keys = [] 29 values = [] 30 for i in range(0, len(args), 2): 31 keys.append(args[i]) 32 values.append(args[i + 1]) 33 34 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 51 # Default argument order is base, expression 52 this = seq_get(args, 0) 53 expression = seq_get(args, 1) 54 55 if expression: 56 if not dialect.LOG_BASE_FIRST: 57 this, expression = expression, this 58 return exp.Log(this=this, expression=expression) 59 60 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
63def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 64 def _builder(args: t.List, dialect: Dialect) -> E: 65 expression = expr_type( 66 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 67 ) 68 if len(args) > 2 and expr_type is exp.JSONExtract: 69 expression.set("expressions", args[2:]) 70 71 return expression 72 73 return _builder
86class Parser(metaclass=_Parser): 87 """ 88 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 89 90 Args: 91 error_level: The desired error level. 92 Default: ErrorLevel.IMMEDIATE 93 error_message_context: The amount of context to capture from a query string when displaying 94 the error message (in number of characters). 95 Default: 100 96 max_errors: Maximum number of error messages to include in a raised ParseError. 97 This is only relevant if error_level is ErrorLevel.RAISE. 98 Default: 3 99 """ 100 101 FUNCTIONS: t.Dict[str, t.Callable] = { 102 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 103 "CONCAT": lambda args, dialect: exp.Concat( 104 expressions=args, 105 safe=not dialect.STRICT_STRING_CONCAT, 106 coalesce=dialect.CONCAT_COALESCE, 107 ), 108 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 109 expressions=args, 110 safe=not dialect.STRICT_STRING_CONCAT, 111 coalesce=dialect.CONCAT_COALESCE, 112 ), 113 "DATE_TO_DATE_STR": lambda args: exp.Cast( 114 this=seq_get(args, 0), 115 to=exp.DataType(this=exp.DataType.Type.TEXT), 116 ), 117 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 118 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 119 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 120 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 121 "LIKE": build_like, 122 "LOG": build_logarithm, 123 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 124 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 125 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 126 "TIME_TO_TIME_STR": lambda args: exp.Cast( 127 this=seq_get(args, 0), 128 to=exp.DataType(this=exp.DataType.Type.TEXT), 129 ), 130 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 131 this=exp.Cast( 132 this=seq_get(args, 0), 133 to=exp.DataType(this=exp.DataType.Type.TEXT), 134 ), 135 start=exp.Literal.number(1), 136 length=exp.Literal.number(10), 137 ), 138 "VAR_MAP": build_var_map, 139 } 140 141 NO_PAREN_FUNCTIONS = { 142 TokenType.CURRENT_DATE: exp.CurrentDate, 143 TokenType.CURRENT_DATETIME: exp.CurrentDate, 144 TokenType.CURRENT_TIME: exp.CurrentTime, 145 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 146 TokenType.CURRENT_USER: exp.CurrentUser, 147 } 148 149 STRUCT_TYPE_TOKENS = { 150 TokenType.NESTED, 151 TokenType.OBJECT, 152 TokenType.STRUCT, 153 } 154 155 NESTED_TYPE_TOKENS = { 156 TokenType.ARRAY, 157 TokenType.LOWCARDINALITY, 158 TokenType.MAP, 159 TokenType.NULLABLE, 160 *STRUCT_TYPE_TOKENS, 161 } 162 163 ENUM_TYPE_TOKENS = { 164 TokenType.ENUM, 165 TokenType.ENUM8, 166 TokenType.ENUM16, 167 } 168 169 AGGREGATE_TYPE_TOKENS = { 170 TokenType.AGGREGATEFUNCTION, 171 TokenType.SIMPLEAGGREGATEFUNCTION, 172 } 173 174 TYPE_TOKENS = { 175 TokenType.BIT, 176 TokenType.BOOLEAN, 177 TokenType.TINYINT, 178 TokenType.UTINYINT, 179 TokenType.SMALLINT, 180 TokenType.USMALLINT, 181 TokenType.INT, 182 TokenType.UINT, 183 TokenType.BIGINT, 184 TokenType.UBIGINT, 185 TokenType.INT128, 186 TokenType.UINT128, 187 TokenType.INT256, 188 TokenType.UINT256, 189 TokenType.MEDIUMINT, 190 TokenType.UMEDIUMINT, 191 TokenType.FIXEDSTRING, 192 TokenType.FLOAT, 193 TokenType.DOUBLE, 194 TokenType.CHAR, 195 TokenType.NCHAR, 196 TokenType.VARCHAR, 197 TokenType.NVARCHAR, 198 TokenType.BPCHAR, 199 TokenType.TEXT, 200 TokenType.MEDIUMTEXT, 201 TokenType.LONGTEXT, 202 TokenType.MEDIUMBLOB, 203 TokenType.LONGBLOB, 204 TokenType.BINARY, 205 TokenType.VARBINARY, 206 TokenType.JSON, 207 TokenType.JSONB, 208 TokenType.INTERVAL, 209 TokenType.TINYBLOB, 210 TokenType.TINYTEXT, 211 TokenType.TIME, 212 TokenType.TIMETZ, 213 TokenType.TIMESTAMP, 214 TokenType.TIMESTAMP_S, 215 TokenType.TIMESTAMP_MS, 216 TokenType.TIMESTAMP_NS, 217 TokenType.TIMESTAMPTZ, 218 TokenType.TIMESTAMPLTZ, 219 TokenType.DATETIME, 220 TokenType.DATETIME64, 221 TokenType.DATE, 222 TokenType.DATE32, 223 TokenType.INT4RANGE, 224 TokenType.INT4MULTIRANGE, 225 TokenType.INT8RANGE, 226 TokenType.INT8MULTIRANGE, 227 TokenType.NUMRANGE, 228 TokenType.NUMMULTIRANGE, 229 TokenType.TSRANGE, 230 TokenType.TSMULTIRANGE, 231 TokenType.TSTZRANGE, 232 TokenType.TSTZMULTIRANGE, 233 TokenType.DATERANGE, 234 TokenType.DATEMULTIRANGE, 235 TokenType.DECIMAL, 236 TokenType.UDECIMAL, 237 TokenType.BIGDECIMAL, 238 TokenType.UUID, 239 TokenType.GEOGRAPHY, 240 TokenType.GEOMETRY, 241 TokenType.HLLSKETCH, 242 TokenType.HSTORE, 243 TokenType.PSEUDO_TYPE, 244 TokenType.SUPER, 245 TokenType.SERIAL, 246 TokenType.SMALLSERIAL, 247 TokenType.BIGSERIAL, 248 TokenType.XML, 249 TokenType.YEAR, 250 TokenType.UNIQUEIDENTIFIER, 251 TokenType.USERDEFINED, 252 TokenType.MONEY, 253 TokenType.SMALLMONEY, 254 TokenType.ROWVERSION, 255 TokenType.IMAGE, 256 TokenType.VARIANT, 257 TokenType.OBJECT, 258 TokenType.OBJECT_IDENTIFIER, 259 TokenType.INET, 260 TokenType.IPADDRESS, 261 TokenType.IPPREFIX, 262 TokenType.IPV4, 263 TokenType.IPV6, 264 TokenType.UNKNOWN, 265 TokenType.NULL, 266 TokenType.NAME, 267 *ENUM_TYPE_TOKENS, 268 *NESTED_TYPE_TOKENS, 269 *AGGREGATE_TYPE_TOKENS, 270 } 271 272 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 273 TokenType.BIGINT: TokenType.UBIGINT, 274 TokenType.INT: TokenType.UINT, 275 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 276 TokenType.SMALLINT: TokenType.USMALLINT, 277 TokenType.TINYINT: TokenType.UTINYINT, 278 TokenType.DECIMAL: TokenType.UDECIMAL, 279 } 280 281 SUBQUERY_PREDICATES = { 282 TokenType.ANY: exp.Any, 283 TokenType.ALL: exp.All, 284 TokenType.EXISTS: exp.Exists, 285 TokenType.SOME: exp.Any, 286 } 287 288 RESERVED_TOKENS = { 289 *Tokenizer.SINGLE_TOKENS.values(), 290 TokenType.SELECT, 291 } 292 293 DB_CREATABLES = { 294 TokenType.DATABASE, 295 TokenType.SCHEMA, 296 TokenType.TABLE, 297 TokenType.VIEW, 298 TokenType.MODEL, 299 TokenType.DICTIONARY, 300 TokenType.SEQUENCE, 301 TokenType.STORAGE_INTEGRATION, 302 } 303 304 CREATABLES = { 305 TokenType.COLUMN, 306 TokenType.CONSTRAINT, 307 TokenType.FUNCTION, 308 TokenType.INDEX, 309 TokenType.PROCEDURE, 310 TokenType.FOREIGN_KEY, 311 *DB_CREATABLES, 312 } 313 314 # Tokens that can represent identifiers 315 ID_VAR_TOKENS = { 316 TokenType.VAR, 317 TokenType.ANTI, 318 TokenType.APPLY, 319 TokenType.ASC, 320 TokenType.ASOF, 321 TokenType.AUTO_INCREMENT, 322 TokenType.BEGIN, 323 TokenType.BPCHAR, 324 TokenType.CACHE, 325 TokenType.CASE, 326 TokenType.COLLATE, 327 TokenType.COMMAND, 328 TokenType.COMMENT, 329 TokenType.COMMIT, 330 TokenType.CONSTRAINT, 331 TokenType.DEFAULT, 332 TokenType.DELETE, 333 TokenType.DESC, 334 TokenType.DESCRIBE, 335 TokenType.DICTIONARY, 336 TokenType.DIV, 337 TokenType.END, 338 TokenType.EXECUTE, 339 TokenType.ESCAPE, 340 TokenType.FALSE, 341 TokenType.FIRST, 342 TokenType.FILTER, 343 TokenType.FINAL, 344 TokenType.FORMAT, 345 TokenType.FULL, 346 TokenType.IS, 347 TokenType.ISNULL, 348 TokenType.INTERVAL, 349 TokenType.KEEP, 350 TokenType.KILL, 351 TokenType.LEFT, 352 TokenType.LOAD, 353 TokenType.MERGE, 354 TokenType.NATURAL, 355 TokenType.NEXT, 356 TokenType.OFFSET, 357 TokenType.OPERATOR, 358 TokenType.ORDINALITY, 359 TokenType.OVERLAPS, 360 TokenType.OVERWRITE, 361 TokenType.PARTITION, 362 TokenType.PERCENT, 363 TokenType.PIVOT, 364 TokenType.PRAGMA, 365 TokenType.RANGE, 366 TokenType.RECURSIVE, 367 TokenType.REFERENCES, 368 TokenType.REFRESH, 369 TokenType.REPLACE, 370 TokenType.RIGHT, 371 TokenType.ROW, 372 TokenType.ROWS, 373 TokenType.SEMI, 374 TokenType.SET, 375 TokenType.SETTINGS, 376 TokenType.SHOW, 377 TokenType.TEMPORARY, 378 TokenType.TOP, 379 TokenType.TRUE, 380 TokenType.TRUNCATE, 381 TokenType.UNIQUE, 382 TokenType.UNPIVOT, 383 TokenType.UPDATE, 384 TokenType.USE, 385 TokenType.VOLATILE, 386 TokenType.WINDOW, 387 *CREATABLES, 388 *SUBQUERY_PREDICATES, 389 *TYPE_TOKENS, 390 *NO_PAREN_FUNCTIONS, 391 } 392 393 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 394 395 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 396 TokenType.ANTI, 397 TokenType.APPLY, 398 TokenType.ASOF, 399 TokenType.FULL, 400 TokenType.LEFT, 401 TokenType.LOCK, 402 TokenType.NATURAL, 403 TokenType.OFFSET, 404 TokenType.RIGHT, 405 TokenType.SEMI, 406 TokenType.WINDOW, 407 } 408 409 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 410 411 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 412 413 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 414 415 FUNC_TOKENS = { 416 TokenType.COLLATE, 417 TokenType.COMMAND, 418 TokenType.CURRENT_DATE, 419 TokenType.CURRENT_DATETIME, 420 TokenType.CURRENT_TIMESTAMP, 421 TokenType.CURRENT_TIME, 422 TokenType.CURRENT_USER, 423 TokenType.FILTER, 424 TokenType.FIRST, 425 TokenType.FORMAT, 426 TokenType.GLOB, 427 TokenType.IDENTIFIER, 428 TokenType.INDEX, 429 TokenType.ISNULL, 430 TokenType.ILIKE, 431 TokenType.INSERT, 432 TokenType.LIKE, 433 TokenType.MERGE, 434 TokenType.OFFSET, 435 TokenType.PRIMARY_KEY, 436 TokenType.RANGE, 437 TokenType.REPLACE, 438 TokenType.RLIKE, 439 TokenType.ROW, 440 TokenType.UNNEST, 441 TokenType.VAR, 442 TokenType.LEFT, 443 TokenType.RIGHT, 444 TokenType.SEQUENCE, 445 TokenType.DATE, 446 TokenType.DATETIME, 447 TokenType.TABLE, 448 TokenType.TIMESTAMP, 449 TokenType.TIMESTAMPTZ, 450 TokenType.TRUNCATE, 451 TokenType.WINDOW, 452 TokenType.XOR, 453 *TYPE_TOKENS, 454 *SUBQUERY_PREDICATES, 455 } 456 457 CONJUNCTION = { 458 TokenType.AND: exp.And, 459 TokenType.OR: exp.Or, 460 } 461 462 EQUALITY = { 463 TokenType.COLON_EQ: exp.PropertyEQ, 464 TokenType.EQ: exp.EQ, 465 TokenType.NEQ: exp.NEQ, 466 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 467 } 468 469 COMPARISON = { 470 TokenType.GT: exp.GT, 471 TokenType.GTE: exp.GTE, 472 TokenType.LT: exp.LT, 473 TokenType.LTE: exp.LTE, 474 } 475 476 BITWISE = { 477 TokenType.AMP: exp.BitwiseAnd, 478 TokenType.CARET: exp.BitwiseXor, 479 TokenType.PIPE: exp.BitwiseOr, 480 } 481 482 TERM = { 483 TokenType.DASH: exp.Sub, 484 TokenType.PLUS: exp.Add, 485 TokenType.MOD: exp.Mod, 486 TokenType.COLLATE: exp.Collate, 487 } 488 489 FACTOR = { 490 TokenType.DIV: exp.IntDiv, 491 TokenType.LR_ARROW: exp.Distance, 492 TokenType.SLASH: exp.Div, 493 TokenType.STAR: exp.Mul, 494 } 495 496 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 497 498 TIMES = { 499 TokenType.TIME, 500 TokenType.TIMETZ, 501 } 502 503 TIMESTAMPS = { 504 TokenType.TIMESTAMP, 505 TokenType.TIMESTAMPTZ, 506 TokenType.TIMESTAMPLTZ, 507 *TIMES, 508 } 509 510 SET_OPERATIONS = { 511 TokenType.UNION, 512 TokenType.INTERSECT, 513 TokenType.EXCEPT, 514 } 515 516 JOIN_METHODS = { 517 TokenType.ASOF, 518 TokenType.NATURAL, 519 TokenType.POSITIONAL, 520 } 521 522 JOIN_SIDES = { 523 TokenType.LEFT, 524 TokenType.RIGHT, 525 TokenType.FULL, 526 } 527 528 JOIN_KINDS = { 529 TokenType.INNER, 530 TokenType.OUTER, 531 TokenType.CROSS, 532 TokenType.SEMI, 533 TokenType.ANTI, 534 } 535 536 JOIN_HINTS: t.Set[str] = set() 537 538 LAMBDAS = { 539 TokenType.ARROW: lambda self, expressions: self.expression( 540 exp.Lambda, 541 this=self._replace_lambda( 542 self._parse_conjunction(), 543 {node.name for node in expressions}, 544 ), 545 expressions=expressions, 546 ), 547 TokenType.FARROW: lambda self, expressions: self.expression( 548 exp.Kwarg, 549 this=exp.var(expressions[0].name), 550 expression=self._parse_conjunction(), 551 ), 552 } 553 554 COLUMN_OPERATORS = { 555 TokenType.DOT: None, 556 TokenType.DCOLON: lambda self, this, to: self.expression( 557 exp.Cast if self.STRICT_CAST else exp.TryCast, 558 this=this, 559 to=to, 560 ), 561 TokenType.ARROW: lambda self, this, path: self.expression( 562 exp.JSONExtract, 563 this=this, 564 expression=self.dialect.to_json_path(path), 565 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 566 ), 567 TokenType.DARROW: lambda self, this, path: self.expression( 568 exp.JSONExtractScalar, 569 this=this, 570 expression=self.dialect.to_json_path(path), 571 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 572 ), 573 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 574 exp.JSONBExtract, 575 this=this, 576 expression=path, 577 ), 578 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 579 exp.JSONBExtractScalar, 580 this=this, 581 expression=path, 582 ), 583 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 584 exp.JSONBContains, 585 this=this, 586 expression=key, 587 ), 588 } 589 590 EXPRESSION_PARSERS = { 591 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 592 exp.Column: lambda self: self._parse_column(), 593 exp.Condition: lambda self: self._parse_conjunction(), 594 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 595 exp.Expression: lambda self: self._parse_expression(), 596 exp.From: lambda self: self._parse_from(), 597 exp.Group: lambda self: self._parse_group(), 598 exp.Having: lambda self: self._parse_having(), 599 exp.Identifier: lambda self: self._parse_id_var(), 600 exp.Join: lambda self: self._parse_join(), 601 exp.Lambda: lambda self: self._parse_lambda(), 602 exp.Lateral: lambda self: self._parse_lateral(), 603 exp.Limit: lambda self: self._parse_limit(), 604 exp.Offset: lambda self: self._parse_offset(), 605 exp.Order: lambda self: self._parse_order(), 606 exp.Ordered: lambda self: self._parse_ordered(), 607 exp.Properties: lambda self: self._parse_properties(), 608 exp.Qualify: lambda self: self._parse_qualify(), 609 exp.Returning: lambda self: self._parse_returning(), 610 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 611 exp.Table: lambda self: self._parse_table_parts(), 612 exp.TableAlias: lambda self: self._parse_table_alias(), 613 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 614 exp.Where: lambda self: self._parse_where(), 615 exp.Window: lambda self: self._parse_named_window(), 616 exp.With: lambda self: self._parse_with(), 617 "JOIN_TYPE": lambda self: self._parse_join_parts(), 618 } 619 620 STATEMENT_PARSERS = { 621 TokenType.ALTER: lambda self: self._parse_alter(), 622 TokenType.BEGIN: lambda self: self._parse_transaction(), 623 TokenType.CACHE: lambda self: self._parse_cache(), 624 TokenType.COMMENT: lambda self: self._parse_comment(), 625 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 626 TokenType.CREATE: lambda self: self._parse_create(), 627 TokenType.DELETE: lambda self: self._parse_delete(), 628 TokenType.DESC: lambda self: self._parse_describe(), 629 TokenType.DESCRIBE: lambda self: self._parse_describe(), 630 TokenType.DROP: lambda self: self._parse_drop(), 631 TokenType.INSERT: lambda self: self._parse_insert(), 632 TokenType.KILL: lambda self: self._parse_kill(), 633 TokenType.LOAD: lambda self: self._parse_load(), 634 TokenType.MERGE: lambda self: self._parse_merge(), 635 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 636 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 637 TokenType.REFRESH: lambda self: self._parse_refresh(), 638 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 639 TokenType.SET: lambda self: self._parse_set(), 640 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 641 TokenType.UNCACHE: lambda self: self._parse_uncache(), 642 TokenType.UPDATE: lambda self: self._parse_update(), 643 TokenType.USE: lambda self: self.expression( 644 exp.Use, 645 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 646 this=self._parse_table(schema=False), 647 ), 648 } 649 650 UNARY_PARSERS = { 651 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 652 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 653 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 654 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 655 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 656 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 657 } 658 659 STRING_PARSERS = { 660 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 661 exp.RawString, this=token.text 662 ), 663 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 664 exp.National, this=token.text 665 ), 666 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 667 TokenType.STRING: lambda self, token: self.expression( 668 exp.Literal, this=token.text, is_string=True 669 ), 670 TokenType.UNICODE_STRING: lambda self, token: self.expression( 671 exp.UnicodeString, 672 this=token.text, 673 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 674 ), 675 } 676 677 NUMERIC_PARSERS = { 678 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 679 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 680 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 681 TokenType.NUMBER: lambda self, token: self.expression( 682 exp.Literal, this=token.text, is_string=False 683 ), 684 } 685 686 PRIMARY_PARSERS = { 687 **STRING_PARSERS, 688 **NUMERIC_PARSERS, 689 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 690 TokenType.NULL: lambda self, _: self.expression(exp.Null), 691 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 692 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 693 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 694 TokenType.STAR: lambda self, _: self.expression( 695 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 696 ), 697 } 698 699 PLACEHOLDER_PARSERS = { 700 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 701 TokenType.PARAMETER: lambda self: self._parse_parameter(), 702 TokenType.COLON: lambda self: ( 703 self.expression(exp.Placeholder, this=self._prev.text) 704 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 705 else None 706 ), 707 } 708 709 RANGE_PARSERS = { 710 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 711 TokenType.GLOB: binary_range_parser(exp.Glob), 712 TokenType.ILIKE: binary_range_parser(exp.ILike), 713 TokenType.IN: lambda self, this: self._parse_in(this), 714 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 715 TokenType.IS: lambda self, this: self._parse_is(this), 716 TokenType.LIKE: binary_range_parser(exp.Like), 717 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 718 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 719 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 720 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 721 } 722 723 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 724 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 725 "AUTO": lambda self: self._parse_auto_property(), 726 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 727 "BACKUP": lambda self: self.expression( 728 exp.BackupProperty, this=self._parse_var(any_token=True) 729 ), 730 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 731 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 732 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 733 "CHECKSUM": lambda self: self._parse_checksum(), 734 "CLUSTER BY": lambda self: self._parse_cluster(), 735 "CLUSTERED": lambda self: self._parse_clustered_by(), 736 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 737 exp.CollateProperty, **kwargs 738 ), 739 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 740 "CONTAINS": lambda self: self._parse_contains_property(), 741 "COPY": lambda self: self._parse_copy_property(), 742 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 743 "DEFINER": lambda self: self._parse_definer(), 744 "DETERMINISTIC": lambda self: self.expression( 745 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 746 ), 747 "DISTKEY": lambda self: self._parse_distkey(), 748 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 749 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 750 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 751 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 752 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 753 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 754 "FREESPACE": lambda self: self._parse_freespace(), 755 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 756 "HEAP": lambda self: self.expression(exp.HeapProperty), 757 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 758 "IMMUTABLE": lambda self: self.expression( 759 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 760 ), 761 "INHERITS": lambda self: self.expression( 762 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 763 ), 764 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 765 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 766 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 767 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 768 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 769 "LIKE": lambda self: self._parse_create_like(), 770 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 771 "LOCK": lambda self: self._parse_locking(), 772 "LOCKING": lambda self: self._parse_locking(), 773 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 774 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 775 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 776 "MODIFIES": lambda self: self._parse_modifies_property(), 777 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 778 "NO": lambda self: self._parse_no_property(), 779 "ON": lambda self: self._parse_on_property(), 780 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 781 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 782 "PARTITION": lambda self: self._parse_partitioned_of(), 783 "PARTITION BY": lambda self: self._parse_partitioned_by(), 784 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 785 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 786 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 787 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 788 "READS": lambda self: self._parse_reads_property(), 789 "REMOTE": lambda self: self._parse_remote_with_connection(), 790 "RETURNS": lambda self: self._parse_returns(), 791 "ROW": lambda self: self._parse_row(), 792 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 793 "SAMPLE": lambda self: self.expression( 794 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 795 ), 796 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 797 "SETTINGS": lambda self: self.expression( 798 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 799 ), 800 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 801 "SORTKEY": lambda self: self._parse_sortkey(), 802 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 803 "STABLE": lambda self: self.expression( 804 exp.StabilityProperty, this=exp.Literal.string("STABLE") 805 ), 806 "STORED": lambda self: self._parse_stored(), 807 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 808 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 809 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 810 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 811 "TO": lambda self: self._parse_to_table(), 812 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 813 "TRANSFORM": lambda self: self.expression( 814 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 815 ), 816 "TTL": lambda self: self._parse_ttl(), 817 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 818 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 819 "VOLATILE": lambda self: self._parse_volatile_property(), 820 "WITH": lambda self: self._parse_with_property(), 821 } 822 823 CONSTRAINT_PARSERS = { 824 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 825 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 826 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 827 "CHARACTER SET": lambda self: self.expression( 828 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 829 ), 830 "CHECK": lambda self: self.expression( 831 exp.CheckColumnConstraint, 832 this=self._parse_wrapped(self._parse_conjunction), 833 enforced=self._match_text_seq("ENFORCED"), 834 ), 835 "COLLATE": lambda self: self.expression( 836 exp.CollateColumnConstraint, this=self._parse_var() 837 ), 838 "COMMENT": lambda self: self.expression( 839 exp.CommentColumnConstraint, this=self._parse_string() 840 ), 841 "COMPRESS": lambda self: self._parse_compress(), 842 "CLUSTERED": lambda self: self.expression( 843 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 844 ), 845 "NONCLUSTERED": lambda self: self.expression( 846 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 847 ), 848 "DEFAULT": lambda self: self.expression( 849 exp.DefaultColumnConstraint, this=self._parse_bitwise() 850 ), 851 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 852 "EXCLUDE": lambda self: self.expression( 853 exp.ExcludeColumnConstraint, this=self._parse_index_params() 854 ), 855 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 856 "FORMAT": lambda self: self.expression( 857 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 858 ), 859 "GENERATED": lambda self: self._parse_generated_as_identity(), 860 "IDENTITY": lambda self: self._parse_auto_increment(), 861 "INLINE": lambda self: self._parse_inline(), 862 "LIKE": lambda self: self._parse_create_like(), 863 "NOT": lambda self: self._parse_not_constraint(), 864 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 865 "ON": lambda self: ( 866 self._match(TokenType.UPDATE) 867 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 868 ) 869 or self.expression(exp.OnProperty, this=self._parse_id_var()), 870 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 871 "PERIOD": lambda self: self._parse_period_for_system_time(), 872 "PRIMARY KEY": lambda self: self._parse_primary_key(), 873 "REFERENCES": lambda self: self._parse_references(match=False), 874 "TITLE": lambda self: self.expression( 875 exp.TitleColumnConstraint, this=self._parse_var_or_string() 876 ), 877 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 878 "UNIQUE": lambda self: self._parse_unique(), 879 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 880 "WITH": lambda self: self.expression( 881 exp.Properties, expressions=self._parse_wrapped_properties() 882 ), 883 } 884 885 ALTER_PARSERS = { 886 "ADD": lambda self: self._parse_alter_table_add(), 887 "ALTER": lambda self: self._parse_alter_table_alter(), 888 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 889 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 890 "DROP": lambda self: self._parse_alter_table_drop(), 891 "RENAME": lambda self: self._parse_alter_table_rename(), 892 } 893 894 SCHEMA_UNNAMED_CONSTRAINTS = { 895 "CHECK", 896 "EXCLUDE", 897 "FOREIGN KEY", 898 "LIKE", 899 "PERIOD", 900 "PRIMARY KEY", 901 "UNIQUE", 902 } 903 904 NO_PAREN_FUNCTION_PARSERS = { 905 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 906 "CASE": lambda self: self._parse_case(), 907 "IF": lambda self: self._parse_if(), 908 "NEXT": lambda self: self._parse_next_value_for(), 909 } 910 911 INVALID_FUNC_NAME_TOKENS = { 912 TokenType.IDENTIFIER, 913 TokenType.STRING, 914 } 915 916 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 917 918 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 919 920 FUNCTION_PARSERS = { 921 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 922 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 923 "DECODE": lambda self: self._parse_decode(), 924 "EXTRACT": lambda self: self._parse_extract(), 925 "JSON_OBJECT": lambda self: self._parse_json_object(), 926 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 927 "JSON_TABLE": lambda self: self._parse_json_table(), 928 "MATCH": lambda self: self._parse_match_against(), 929 "OPENJSON": lambda self: self._parse_open_json(), 930 "POSITION": lambda self: self._parse_position(), 931 "PREDICT": lambda self: self._parse_predict(), 932 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 933 "STRING_AGG": lambda self: self._parse_string_agg(), 934 "SUBSTRING": lambda self: self._parse_substring(), 935 "TRIM": lambda self: self._parse_trim(), 936 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 937 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 938 } 939 940 QUERY_MODIFIER_PARSERS = { 941 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 942 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 943 TokenType.WHERE: lambda self: ("where", self._parse_where()), 944 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 945 TokenType.HAVING: lambda self: ("having", self._parse_having()), 946 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 947 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 948 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 949 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 950 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 951 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 952 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 953 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 954 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 955 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 956 TokenType.CLUSTER_BY: lambda self: ( 957 "cluster", 958 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 959 ), 960 TokenType.DISTRIBUTE_BY: lambda self: ( 961 "distribute", 962 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 963 ), 964 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 965 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 966 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 967 } 968 969 SET_PARSERS = { 970 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 971 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 972 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 973 "TRANSACTION": lambda self: self._parse_set_transaction(), 974 } 975 976 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 977 978 TYPE_LITERAL_PARSERS = { 979 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 980 } 981 982 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 983 984 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 985 986 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 987 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 988 "ISOLATION": ( 989 ("LEVEL", "REPEATABLE", "READ"), 990 ("LEVEL", "READ", "COMMITTED"), 991 ("LEVEL", "READ", "UNCOMITTED"), 992 ("LEVEL", "SERIALIZABLE"), 993 ), 994 "READ": ("WRITE", "ONLY"), 995 } 996 997 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 998 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 999 ) 1000 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1001 1002 CREATE_SEQUENCE: OPTIONS_TYPE = { 1003 "SCALE": ("EXTEND", "NOEXTEND"), 1004 "SHARD": ("EXTEND", "NOEXTEND"), 1005 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1006 **dict.fromkeys( 1007 ( 1008 "SESSION", 1009 "GLOBAL", 1010 "KEEP", 1011 "NOKEEP", 1012 "ORDER", 1013 "NOORDER", 1014 "NOCACHE", 1015 "CYCLE", 1016 "NOCYCLE", 1017 "NOMINVALUE", 1018 "NOMAXVALUE", 1019 "NOSCALE", 1020 "NOSHARD", 1021 ), 1022 tuple(), 1023 ), 1024 } 1025 1026 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1027 1028 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1029 1030 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1031 1032 CLONE_KEYWORDS = {"CLONE", "COPY"} 1033 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1034 1035 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1036 1037 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1038 1039 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1040 1041 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1042 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1043 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1044 1045 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1046 1047 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1048 1049 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1050 1051 DISTINCT_TOKENS = {TokenType.DISTINCT} 1052 1053 NULL_TOKENS = {TokenType.NULL} 1054 1055 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1056 1057 STRICT_CAST = True 1058 1059 PREFIXED_PIVOT_COLUMNS = False 1060 IDENTIFY_PIVOT_STRINGS = False 1061 1062 LOG_DEFAULTS_TO_LN = False 1063 1064 # Whether ADD is present for each column added by ALTER TABLE 1065 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1066 1067 # Whether the table sample clause expects CSV syntax 1068 TABLESAMPLE_CSV = False 1069 1070 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1071 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1072 1073 # Whether the TRIM function expects the characters to trim as its first argument 1074 TRIM_PATTERN_FIRST = False 1075 1076 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1077 STRING_ALIASES = False 1078 1079 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1080 MODIFIERS_ATTACHED_TO_UNION = True 1081 UNION_MODIFIERS = {"order", "limit", "offset"} 1082 1083 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1084 NO_PAREN_IF_COMMANDS = True 1085 1086 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1087 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1088 1089 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1090 # If this is True and '(' is not found, the keyword will be treated as an identifier 1091 VALUES_FOLLOWED_BY_PAREN = True 1092 1093 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1094 SUPPORTS_IMPLICIT_UNNEST = False 1095 1096 __slots__ = ( 1097 "error_level", 1098 "error_message_context", 1099 "max_errors", 1100 "dialect", 1101 "sql", 1102 "errors", 1103 "_tokens", 1104 "_index", 1105 "_curr", 1106 "_next", 1107 "_prev", 1108 "_prev_comments", 1109 ) 1110 1111 # Autofilled 1112 SHOW_TRIE: t.Dict = {} 1113 SET_TRIE: t.Dict = {} 1114 1115 def __init__( 1116 self, 1117 error_level: t.Optional[ErrorLevel] = None, 1118 error_message_context: int = 100, 1119 max_errors: int = 3, 1120 dialect: DialectType = None, 1121 ): 1122 from sqlglot.dialects import Dialect 1123 1124 self.error_level = error_level or ErrorLevel.IMMEDIATE 1125 self.error_message_context = error_message_context 1126 self.max_errors = max_errors 1127 self.dialect = Dialect.get_or_raise(dialect) 1128 self.reset() 1129 1130 def reset(self): 1131 self.sql = "" 1132 self.errors = [] 1133 self._tokens = [] 1134 self._index = 0 1135 self._curr = None 1136 self._next = None 1137 self._prev = None 1138 self._prev_comments = None 1139 1140 def parse( 1141 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1142 ) -> t.List[t.Optional[exp.Expression]]: 1143 """ 1144 Parses a list of tokens and returns a list of syntax trees, one tree 1145 per parsed SQL statement. 1146 1147 Args: 1148 raw_tokens: The list of tokens. 1149 sql: The original SQL string, used to produce helpful debug messages. 1150 1151 Returns: 1152 The list of the produced syntax trees. 1153 """ 1154 return self._parse( 1155 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1156 ) 1157 1158 def parse_into( 1159 self, 1160 expression_types: exp.IntoType, 1161 raw_tokens: t.List[Token], 1162 sql: t.Optional[str] = None, 1163 ) -> t.List[t.Optional[exp.Expression]]: 1164 """ 1165 Parses a list of tokens into a given Expression type. If a collection of Expression 1166 types is given instead, this method will try to parse the token list into each one 1167 of them, stopping at the first for which the parsing succeeds. 1168 1169 Args: 1170 expression_types: The expression type(s) to try and parse the token list into. 1171 raw_tokens: The list of tokens. 1172 sql: The original SQL string, used to produce helpful debug messages. 1173 1174 Returns: 1175 The target Expression. 1176 """ 1177 errors = [] 1178 for expression_type in ensure_list(expression_types): 1179 parser = self.EXPRESSION_PARSERS.get(expression_type) 1180 if not parser: 1181 raise TypeError(f"No parser registered for {expression_type}") 1182 1183 try: 1184 return self._parse(parser, raw_tokens, sql) 1185 except ParseError as e: 1186 e.errors[0]["into_expression"] = expression_type 1187 errors.append(e) 1188 1189 raise ParseError( 1190 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1191 errors=merge_errors(errors), 1192 ) from errors[-1] 1193 1194 def _parse( 1195 self, 1196 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1197 raw_tokens: t.List[Token], 1198 sql: t.Optional[str] = None, 1199 ) -> t.List[t.Optional[exp.Expression]]: 1200 self.reset() 1201 self.sql = sql or "" 1202 1203 total = len(raw_tokens) 1204 chunks: t.List[t.List[Token]] = [[]] 1205 1206 for i, token in enumerate(raw_tokens): 1207 if token.token_type == TokenType.SEMICOLON: 1208 if i < total - 1: 1209 chunks.append([]) 1210 else: 1211 chunks[-1].append(token) 1212 1213 expressions = [] 1214 1215 for tokens in chunks: 1216 self._index = -1 1217 self._tokens = tokens 1218 self._advance() 1219 1220 expressions.append(parse_method(self)) 1221 1222 if self._index < len(self._tokens): 1223 self.raise_error("Invalid expression / Unexpected token") 1224 1225 self.check_errors() 1226 1227 return expressions 1228 1229 def check_errors(self) -> None: 1230 """Logs or raises any found errors, depending on the chosen error level setting.""" 1231 if self.error_level == ErrorLevel.WARN: 1232 for error in self.errors: 1233 logger.error(str(error)) 1234 elif self.error_level == ErrorLevel.RAISE and self.errors: 1235 raise ParseError( 1236 concat_messages(self.errors, self.max_errors), 1237 errors=merge_errors(self.errors), 1238 ) 1239 1240 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1241 """ 1242 Appends an error in the list of recorded errors or raises it, depending on the chosen 1243 error level setting. 1244 """ 1245 token = token or self._curr or self._prev or Token.string("") 1246 start = token.start 1247 end = token.end + 1 1248 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1249 highlight = self.sql[start:end] 1250 end_context = self.sql[end : end + self.error_message_context] 1251 1252 error = ParseError.new( 1253 f"{message}. Line {token.line}, Col: {token.col}.\n" 1254 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1255 description=message, 1256 line=token.line, 1257 col=token.col, 1258 start_context=start_context, 1259 highlight=highlight, 1260 end_context=end_context, 1261 ) 1262 1263 if self.error_level == ErrorLevel.IMMEDIATE: 1264 raise error 1265 1266 self.errors.append(error) 1267 1268 def expression( 1269 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1270 ) -> E: 1271 """ 1272 Creates a new, validated Expression. 1273 1274 Args: 1275 exp_class: The expression class to instantiate. 1276 comments: An optional list of comments to attach to the expression. 1277 kwargs: The arguments to set for the expression along with their respective values. 1278 1279 Returns: 1280 The target expression. 1281 """ 1282 instance = exp_class(**kwargs) 1283 instance.add_comments(comments) if comments else self._add_comments(instance) 1284 return self.validate_expression(instance) 1285 1286 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1287 if expression and self._prev_comments: 1288 expression.add_comments(self._prev_comments) 1289 self._prev_comments = None 1290 1291 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1292 """ 1293 Validates an Expression, making sure that all its mandatory arguments are set. 1294 1295 Args: 1296 expression: The expression to validate. 1297 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1298 1299 Returns: 1300 The validated expression. 1301 """ 1302 if self.error_level != ErrorLevel.IGNORE: 1303 for error_message in expression.error_messages(args): 1304 self.raise_error(error_message) 1305 1306 return expression 1307 1308 def _find_sql(self, start: Token, end: Token) -> str: 1309 return self.sql[start.start : end.end + 1] 1310 1311 def _is_connected(self) -> bool: 1312 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1313 1314 def _advance(self, times: int = 1) -> None: 1315 self._index += times 1316 self._curr = seq_get(self._tokens, self._index) 1317 self._next = seq_get(self._tokens, self._index + 1) 1318 1319 if self._index > 0: 1320 self._prev = self._tokens[self._index - 1] 1321 self._prev_comments = self._prev.comments 1322 else: 1323 self._prev = None 1324 self._prev_comments = None 1325 1326 def _retreat(self, index: int) -> None: 1327 if index != self._index: 1328 self._advance(index - self._index) 1329 1330 def _warn_unsupported(self) -> None: 1331 if len(self._tokens) <= 1: 1332 return 1333 1334 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1335 # interested in emitting a warning for the one being currently processed. 1336 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1337 1338 logger.warning( 1339 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1340 ) 1341 1342 def _parse_command(self) -> exp.Command: 1343 self._warn_unsupported() 1344 return self.expression( 1345 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1346 ) 1347 1348 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1349 start = self._prev 1350 exists = self._parse_exists() if allow_exists else None 1351 1352 self._match(TokenType.ON) 1353 1354 kind = self._match_set(self.CREATABLES) and self._prev 1355 if not kind: 1356 return self._parse_as_command(start) 1357 1358 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1359 this = self._parse_user_defined_function(kind=kind.token_type) 1360 elif kind.token_type == TokenType.TABLE: 1361 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1362 elif kind.token_type == TokenType.COLUMN: 1363 this = self._parse_column() 1364 else: 1365 this = self._parse_id_var() 1366 1367 self._match(TokenType.IS) 1368 1369 return self.expression( 1370 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1371 ) 1372 1373 def _parse_to_table( 1374 self, 1375 ) -> exp.ToTableProperty: 1376 table = self._parse_table_parts(schema=True) 1377 return self.expression(exp.ToTableProperty, this=table) 1378 1379 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1380 def _parse_ttl(self) -> exp.Expression: 1381 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1382 this = self._parse_bitwise() 1383 1384 if self._match_text_seq("DELETE"): 1385 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1386 if self._match_text_seq("RECOMPRESS"): 1387 return self.expression( 1388 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1389 ) 1390 if self._match_text_seq("TO", "DISK"): 1391 return self.expression( 1392 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1393 ) 1394 if self._match_text_seq("TO", "VOLUME"): 1395 return self.expression( 1396 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1397 ) 1398 1399 return this 1400 1401 expressions = self._parse_csv(_parse_ttl_action) 1402 where = self._parse_where() 1403 group = self._parse_group() 1404 1405 aggregates = None 1406 if group and self._match(TokenType.SET): 1407 aggregates = self._parse_csv(self._parse_set_item) 1408 1409 return self.expression( 1410 exp.MergeTreeTTL, 1411 expressions=expressions, 1412 where=where, 1413 group=group, 1414 aggregates=aggregates, 1415 ) 1416 1417 def _parse_statement(self) -> t.Optional[exp.Expression]: 1418 if self._curr is None: 1419 return None 1420 1421 if self._match_set(self.STATEMENT_PARSERS): 1422 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1423 1424 if self._match_set(Tokenizer.COMMANDS): 1425 return self._parse_command() 1426 1427 expression = self._parse_expression() 1428 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1429 return self._parse_query_modifiers(expression) 1430 1431 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1432 start = self._prev 1433 temporary = self._match(TokenType.TEMPORARY) 1434 materialized = self._match_text_seq("MATERIALIZED") 1435 1436 kind = self._match_set(self.CREATABLES) and self._prev.text 1437 if not kind: 1438 return self._parse_as_command(start) 1439 1440 if_exists = exists or self._parse_exists() 1441 table = self._parse_table_parts( 1442 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1443 ) 1444 1445 if self._match(TokenType.L_PAREN, advance=False): 1446 expressions = self._parse_wrapped_csv(self._parse_types) 1447 else: 1448 expressions = None 1449 1450 return self.expression( 1451 exp.Drop, 1452 comments=start.comments, 1453 exists=if_exists, 1454 this=table, 1455 expressions=expressions, 1456 kind=kind, 1457 temporary=temporary, 1458 materialized=materialized, 1459 cascade=self._match_text_seq("CASCADE"), 1460 constraints=self._match_text_seq("CONSTRAINTS"), 1461 purge=self._match_text_seq("PURGE"), 1462 ) 1463 1464 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1465 return ( 1466 self._match_text_seq("IF") 1467 and (not not_ or self._match(TokenType.NOT)) 1468 and self._match(TokenType.EXISTS) 1469 ) 1470 1471 def _parse_create(self) -> exp.Create | exp.Command: 1472 # Note: this can't be None because we've matched a statement parser 1473 start = self._prev 1474 comments = self._prev_comments 1475 1476 replace = ( 1477 start.token_type == TokenType.REPLACE 1478 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1479 or self._match_pair(TokenType.OR, TokenType.ALTER) 1480 ) 1481 1482 unique = self._match(TokenType.UNIQUE) 1483 1484 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1485 self._advance() 1486 1487 properties = None 1488 create_token = self._match_set(self.CREATABLES) and self._prev 1489 1490 if not create_token: 1491 # exp.Properties.Location.POST_CREATE 1492 properties = self._parse_properties() 1493 create_token = self._match_set(self.CREATABLES) and self._prev 1494 1495 if not properties or not create_token: 1496 return self._parse_as_command(start) 1497 1498 exists = self._parse_exists(not_=True) 1499 this = None 1500 expression: t.Optional[exp.Expression] = None 1501 indexes = None 1502 no_schema_binding = None 1503 begin = None 1504 end = None 1505 clone = None 1506 1507 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1508 nonlocal properties 1509 if properties and temp_props: 1510 properties.expressions.extend(temp_props.expressions) 1511 elif temp_props: 1512 properties = temp_props 1513 1514 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1515 this = self._parse_user_defined_function(kind=create_token.token_type) 1516 1517 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1518 extend_props(self._parse_properties()) 1519 1520 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1521 1522 if not expression: 1523 if self._match(TokenType.COMMAND): 1524 expression = self._parse_as_command(self._prev) 1525 else: 1526 begin = self._match(TokenType.BEGIN) 1527 return_ = self._match_text_seq("RETURN") 1528 1529 if self._match(TokenType.STRING, advance=False): 1530 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1531 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1532 expression = self._parse_string() 1533 extend_props(self._parse_properties()) 1534 else: 1535 expression = self._parse_statement() 1536 1537 end = self._match_text_seq("END") 1538 1539 if return_: 1540 expression = self.expression(exp.Return, this=expression) 1541 elif create_token.token_type == TokenType.INDEX: 1542 this = self._parse_index(index=self._parse_id_var()) 1543 elif create_token.token_type in self.DB_CREATABLES: 1544 table_parts = self._parse_table_parts( 1545 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1546 ) 1547 1548 # exp.Properties.Location.POST_NAME 1549 self._match(TokenType.COMMA) 1550 extend_props(self._parse_properties(before=True)) 1551 1552 this = self._parse_schema(this=table_parts) 1553 1554 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1555 extend_props(self._parse_properties()) 1556 1557 self._match(TokenType.ALIAS) 1558 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1559 # exp.Properties.Location.POST_ALIAS 1560 extend_props(self._parse_properties()) 1561 1562 if create_token.token_type == TokenType.SEQUENCE: 1563 expression = self._parse_types() 1564 extend_props(self._parse_properties()) 1565 else: 1566 expression = self._parse_ddl_select() 1567 1568 if create_token.token_type == TokenType.TABLE: 1569 # exp.Properties.Location.POST_EXPRESSION 1570 extend_props(self._parse_properties()) 1571 1572 indexes = [] 1573 while True: 1574 index = self._parse_index() 1575 1576 # exp.Properties.Location.POST_INDEX 1577 extend_props(self._parse_properties()) 1578 1579 if not index: 1580 break 1581 else: 1582 self._match(TokenType.COMMA) 1583 indexes.append(index) 1584 elif create_token.token_type == TokenType.VIEW: 1585 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1586 no_schema_binding = True 1587 1588 shallow = self._match_text_seq("SHALLOW") 1589 1590 if self._match_texts(self.CLONE_KEYWORDS): 1591 copy = self._prev.text.lower() == "copy" 1592 clone = self.expression( 1593 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1594 ) 1595 1596 if self._curr: 1597 return self._parse_as_command(start) 1598 1599 return self.expression( 1600 exp.Create, 1601 comments=comments, 1602 this=this, 1603 kind=create_token.text.upper(), 1604 replace=replace, 1605 unique=unique, 1606 expression=expression, 1607 exists=exists, 1608 properties=properties, 1609 indexes=indexes, 1610 no_schema_binding=no_schema_binding, 1611 begin=begin, 1612 end=end, 1613 clone=clone, 1614 ) 1615 1616 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1617 seq = exp.SequenceProperties() 1618 1619 options = [] 1620 index = self._index 1621 1622 while self._curr: 1623 if self._match_text_seq("INCREMENT"): 1624 self._match_text_seq("BY") 1625 self._match_text_seq("=") 1626 seq.set("increment", self._parse_term()) 1627 elif self._match_text_seq("MINVALUE"): 1628 seq.set("minvalue", self._parse_term()) 1629 elif self._match_text_seq("MAXVALUE"): 1630 seq.set("maxvalue", self._parse_term()) 1631 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1632 self._match_text_seq("=") 1633 seq.set("start", self._parse_term()) 1634 elif self._match_text_seq("CACHE"): 1635 # T-SQL allows empty CACHE which is initialized dynamically 1636 seq.set("cache", self._parse_number() or True) 1637 elif self._match_text_seq("OWNED", "BY"): 1638 # "OWNED BY NONE" is the default 1639 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1640 else: 1641 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1642 if opt: 1643 options.append(opt) 1644 else: 1645 break 1646 1647 seq.set("options", options if options else None) 1648 return None if self._index == index else seq 1649 1650 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1651 # only used for teradata currently 1652 self._match(TokenType.COMMA) 1653 1654 kwargs = { 1655 "no": self._match_text_seq("NO"), 1656 "dual": self._match_text_seq("DUAL"), 1657 "before": self._match_text_seq("BEFORE"), 1658 "default": self._match_text_seq("DEFAULT"), 1659 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1660 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1661 "after": self._match_text_seq("AFTER"), 1662 "minimum": self._match_texts(("MIN", "MINIMUM")), 1663 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1664 } 1665 1666 if self._match_texts(self.PROPERTY_PARSERS): 1667 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1668 try: 1669 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1670 except TypeError: 1671 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1672 1673 return None 1674 1675 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1676 return self._parse_wrapped_csv(self._parse_property) 1677 1678 def _parse_property(self) -> t.Optional[exp.Expression]: 1679 if self._match_texts(self.PROPERTY_PARSERS): 1680 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1681 1682 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1683 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1684 1685 if self._match_text_seq("COMPOUND", "SORTKEY"): 1686 return self._parse_sortkey(compound=True) 1687 1688 if self._match_text_seq("SQL", "SECURITY"): 1689 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1690 1691 index = self._index 1692 key = self._parse_column() 1693 1694 if not self._match(TokenType.EQ): 1695 self._retreat(index) 1696 return self._parse_sequence_properties() 1697 1698 return self.expression( 1699 exp.Property, 1700 this=key.to_dot() if isinstance(key, exp.Column) else key, 1701 value=self._parse_column() or self._parse_var(any_token=True), 1702 ) 1703 1704 def _parse_stored(self) -> exp.FileFormatProperty: 1705 self._match(TokenType.ALIAS) 1706 1707 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1708 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1709 1710 return self.expression( 1711 exp.FileFormatProperty, 1712 this=( 1713 self.expression( 1714 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1715 ) 1716 if input_format or output_format 1717 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1718 ), 1719 ) 1720 1721 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1722 self._match(TokenType.EQ) 1723 self._match(TokenType.ALIAS) 1724 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1725 1726 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1727 properties = [] 1728 while True: 1729 if before: 1730 prop = self._parse_property_before() 1731 else: 1732 prop = self._parse_property() 1733 if not prop: 1734 break 1735 for p in ensure_list(prop): 1736 properties.append(p) 1737 1738 if properties: 1739 return self.expression(exp.Properties, expressions=properties) 1740 1741 return None 1742 1743 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1744 return self.expression( 1745 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1746 ) 1747 1748 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1749 if self._index >= 2: 1750 pre_volatile_token = self._tokens[self._index - 2] 1751 else: 1752 pre_volatile_token = None 1753 1754 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1755 return exp.VolatileProperty() 1756 1757 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1758 1759 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1760 self._match_pair(TokenType.EQ, TokenType.ON) 1761 1762 prop = self.expression(exp.WithSystemVersioningProperty) 1763 if self._match(TokenType.L_PAREN): 1764 self._match_text_seq("HISTORY_TABLE", "=") 1765 prop.set("this", self._parse_table_parts()) 1766 1767 if self._match(TokenType.COMMA): 1768 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1769 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1770 1771 self._match_r_paren() 1772 1773 return prop 1774 1775 def _parse_with_property( 1776 self, 1777 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1778 if self._match(TokenType.L_PAREN, advance=False): 1779 return self._parse_wrapped_properties() 1780 1781 if self._match_text_seq("JOURNAL"): 1782 return self._parse_withjournaltable() 1783 1784 if self._match_text_seq("DATA"): 1785 return self._parse_withdata(no=False) 1786 elif self._match_text_seq("NO", "DATA"): 1787 return self._parse_withdata(no=True) 1788 1789 if not self._next: 1790 return None 1791 1792 return self._parse_withisolatedloading() 1793 1794 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1795 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1796 self._match(TokenType.EQ) 1797 1798 user = self._parse_id_var() 1799 self._match(TokenType.PARAMETER) 1800 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1801 1802 if not user or not host: 1803 return None 1804 1805 return exp.DefinerProperty(this=f"{user}@{host}") 1806 1807 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1808 self._match(TokenType.TABLE) 1809 self._match(TokenType.EQ) 1810 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1811 1812 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1813 return self.expression(exp.LogProperty, no=no) 1814 1815 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1816 return self.expression(exp.JournalProperty, **kwargs) 1817 1818 def _parse_checksum(self) -> exp.ChecksumProperty: 1819 self._match(TokenType.EQ) 1820 1821 on = None 1822 if self._match(TokenType.ON): 1823 on = True 1824 elif self._match_text_seq("OFF"): 1825 on = False 1826 1827 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1828 1829 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1830 return self.expression( 1831 exp.Cluster, 1832 expressions=( 1833 self._parse_wrapped_csv(self._parse_ordered) 1834 if wrapped 1835 else self._parse_csv(self._parse_ordered) 1836 ), 1837 ) 1838 1839 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1840 self._match_text_seq("BY") 1841 1842 self._match_l_paren() 1843 expressions = self._parse_csv(self._parse_column) 1844 self._match_r_paren() 1845 1846 if self._match_text_seq("SORTED", "BY"): 1847 self._match_l_paren() 1848 sorted_by = self._parse_csv(self._parse_ordered) 1849 self._match_r_paren() 1850 else: 1851 sorted_by = None 1852 1853 self._match(TokenType.INTO) 1854 buckets = self._parse_number() 1855 self._match_text_seq("BUCKETS") 1856 1857 return self.expression( 1858 exp.ClusteredByProperty, 1859 expressions=expressions, 1860 sorted_by=sorted_by, 1861 buckets=buckets, 1862 ) 1863 1864 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1865 if not self._match_text_seq("GRANTS"): 1866 self._retreat(self._index - 1) 1867 return None 1868 1869 return self.expression(exp.CopyGrantsProperty) 1870 1871 def _parse_freespace(self) -> exp.FreespaceProperty: 1872 self._match(TokenType.EQ) 1873 return self.expression( 1874 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1875 ) 1876 1877 def _parse_mergeblockratio( 1878 self, no: bool = False, default: bool = False 1879 ) -> exp.MergeBlockRatioProperty: 1880 if self._match(TokenType.EQ): 1881 return self.expression( 1882 exp.MergeBlockRatioProperty, 1883 this=self._parse_number(), 1884 percent=self._match(TokenType.PERCENT), 1885 ) 1886 1887 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1888 1889 def _parse_datablocksize( 1890 self, 1891 default: t.Optional[bool] = None, 1892 minimum: t.Optional[bool] = None, 1893 maximum: t.Optional[bool] = None, 1894 ) -> exp.DataBlocksizeProperty: 1895 self._match(TokenType.EQ) 1896 size = self._parse_number() 1897 1898 units = None 1899 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1900 units = self._prev.text 1901 1902 return self.expression( 1903 exp.DataBlocksizeProperty, 1904 size=size, 1905 units=units, 1906 default=default, 1907 minimum=minimum, 1908 maximum=maximum, 1909 ) 1910 1911 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1912 self._match(TokenType.EQ) 1913 always = self._match_text_seq("ALWAYS") 1914 manual = self._match_text_seq("MANUAL") 1915 never = self._match_text_seq("NEVER") 1916 default = self._match_text_seq("DEFAULT") 1917 1918 autotemp = None 1919 if self._match_text_seq("AUTOTEMP"): 1920 autotemp = self._parse_schema() 1921 1922 return self.expression( 1923 exp.BlockCompressionProperty, 1924 always=always, 1925 manual=manual, 1926 never=never, 1927 default=default, 1928 autotemp=autotemp, 1929 ) 1930 1931 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1932 no = self._match_text_seq("NO") 1933 concurrent = self._match_text_seq("CONCURRENT") 1934 self._match_text_seq("ISOLATED", "LOADING") 1935 for_all = self._match_text_seq("FOR", "ALL") 1936 for_insert = self._match_text_seq("FOR", "INSERT") 1937 for_none = self._match_text_seq("FOR", "NONE") 1938 return self.expression( 1939 exp.IsolatedLoadingProperty, 1940 no=no, 1941 concurrent=concurrent, 1942 for_all=for_all, 1943 for_insert=for_insert, 1944 for_none=for_none, 1945 ) 1946 1947 def _parse_locking(self) -> exp.LockingProperty: 1948 if self._match(TokenType.TABLE): 1949 kind = "TABLE" 1950 elif self._match(TokenType.VIEW): 1951 kind = "VIEW" 1952 elif self._match(TokenType.ROW): 1953 kind = "ROW" 1954 elif self._match_text_seq("DATABASE"): 1955 kind = "DATABASE" 1956 else: 1957 kind = None 1958 1959 if kind in ("DATABASE", "TABLE", "VIEW"): 1960 this = self._parse_table_parts() 1961 else: 1962 this = None 1963 1964 if self._match(TokenType.FOR): 1965 for_or_in = "FOR" 1966 elif self._match(TokenType.IN): 1967 for_or_in = "IN" 1968 else: 1969 for_or_in = None 1970 1971 if self._match_text_seq("ACCESS"): 1972 lock_type = "ACCESS" 1973 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1974 lock_type = "EXCLUSIVE" 1975 elif self._match_text_seq("SHARE"): 1976 lock_type = "SHARE" 1977 elif self._match_text_seq("READ"): 1978 lock_type = "READ" 1979 elif self._match_text_seq("WRITE"): 1980 lock_type = "WRITE" 1981 elif self._match_text_seq("CHECKSUM"): 1982 lock_type = "CHECKSUM" 1983 else: 1984 lock_type = None 1985 1986 override = self._match_text_seq("OVERRIDE") 1987 1988 return self.expression( 1989 exp.LockingProperty, 1990 this=this, 1991 kind=kind, 1992 for_or_in=for_or_in, 1993 lock_type=lock_type, 1994 override=override, 1995 ) 1996 1997 def _parse_partition_by(self) -> t.List[exp.Expression]: 1998 if self._match(TokenType.PARTITION_BY): 1999 return self._parse_csv(self._parse_conjunction) 2000 return [] 2001 2002 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2003 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2004 if self._match_text_seq("MINVALUE"): 2005 return exp.var("MINVALUE") 2006 if self._match_text_seq("MAXVALUE"): 2007 return exp.var("MAXVALUE") 2008 return self._parse_bitwise() 2009 2010 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2011 expression = None 2012 from_expressions = None 2013 to_expressions = None 2014 2015 if self._match(TokenType.IN): 2016 this = self._parse_wrapped_csv(self._parse_bitwise) 2017 elif self._match(TokenType.FROM): 2018 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2019 self._match_text_seq("TO") 2020 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2021 elif self._match_text_seq("WITH", "(", "MODULUS"): 2022 this = self._parse_number() 2023 self._match_text_seq(",", "REMAINDER") 2024 expression = self._parse_number() 2025 self._match_r_paren() 2026 else: 2027 self.raise_error("Failed to parse partition bound spec.") 2028 2029 return self.expression( 2030 exp.PartitionBoundSpec, 2031 this=this, 2032 expression=expression, 2033 from_expressions=from_expressions, 2034 to_expressions=to_expressions, 2035 ) 2036 2037 # https://www.postgresql.org/docs/current/sql-createtable.html 2038 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2039 if not self._match_text_seq("OF"): 2040 self._retreat(self._index - 1) 2041 return None 2042 2043 this = self._parse_table(schema=True) 2044 2045 if self._match(TokenType.DEFAULT): 2046 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2047 elif self._match_text_seq("FOR", "VALUES"): 2048 expression = self._parse_partition_bound_spec() 2049 else: 2050 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2051 2052 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2053 2054 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2055 self._match(TokenType.EQ) 2056 return self.expression( 2057 exp.PartitionedByProperty, 2058 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2059 ) 2060 2061 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2062 if self._match_text_seq("AND", "STATISTICS"): 2063 statistics = True 2064 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2065 statistics = False 2066 else: 2067 statistics = None 2068 2069 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2070 2071 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2072 if self._match_text_seq("SQL"): 2073 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2074 return None 2075 2076 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2077 if self._match_text_seq("SQL", "DATA"): 2078 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2079 return None 2080 2081 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2082 if self._match_text_seq("PRIMARY", "INDEX"): 2083 return exp.NoPrimaryIndexProperty() 2084 if self._match_text_seq("SQL"): 2085 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2086 return None 2087 2088 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2089 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2090 return exp.OnCommitProperty() 2091 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2092 return exp.OnCommitProperty(delete=True) 2093 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2094 2095 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2096 if self._match_text_seq("SQL", "DATA"): 2097 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2098 return None 2099 2100 def _parse_distkey(self) -> exp.DistKeyProperty: 2101 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2102 2103 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2104 table = self._parse_table(schema=True) 2105 2106 options = [] 2107 while self._match_texts(("INCLUDING", "EXCLUDING")): 2108 this = self._prev.text.upper() 2109 2110 id_var = self._parse_id_var() 2111 if not id_var: 2112 return None 2113 2114 options.append( 2115 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2116 ) 2117 2118 return self.expression(exp.LikeProperty, this=table, expressions=options) 2119 2120 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2121 return self.expression( 2122 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2123 ) 2124 2125 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2126 self._match(TokenType.EQ) 2127 return self.expression( 2128 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2129 ) 2130 2131 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2132 self._match_text_seq("WITH", "CONNECTION") 2133 return self.expression( 2134 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2135 ) 2136 2137 def _parse_returns(self) -> exp.ReturnsProperty: 2138 value: t.Optional[exp.Expression] 2139 is_table = self._match(TokenType.TABLE) 2140 2141 if is_table: 2142 if self._match(TokenType.LT): 2143 value = self.expression( 2144 exp.Schema, 2145 this="TABLE", 2146 expressions=self._parse_csv(self._parse_struct_types), 2147 ) 2148 if not self._match(TokenType.GT): 2149 self.raise_error("Expecting >") 2150 else: 2151 value = self._parse_schema(exp.var("TABLE")) 2152 else: 2153 value = self._parse_types() 2154 2155 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2156 2157 def _parse_describe(self) -> exp.Describe: 2158 kind = self._match_set(self.CREATABLES) and self._prev.text 2159 extended = self._match_text_seq("EXTENDED") 2160 this = self._parse_table(schema=True) 2161 properties = self._parse_properties() 2162 expressions = properties.expressions if properties else None 2163 return self.expression( 2164 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2165 ) 2166 2167 def _parse_insert(self) -> exp.Insert: 2168 comments = ensure_list(self._prev_comments) 2169 hint = self._parse_hint() 2170 overwrite = self._match(TokenType.OVERWRITE) 2171 ignore = self._match(TokenType.IGNORE) 2172 local = self._match_text_seq("LOCAL") 2173 alternative = None 2174 is_function = None 2175 2176 if self._match_text_seq("DIRECTORY"): 2177 this: t.Optional[exp.Expression] = self.expression( 2178 exp.Directory, 2179 this=self._parse_var_or_string(), 2180 local=local, 2181 row_format=self._parse_row_format(match_row=True), 2182 ) 2183 else: 2184 if self._match(TokenType.OR): 2185 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2186 2187 self._match(TokenType.INTO) 2188 comments += ensure_list(self._prev_comments) 2189 self._match(TokenType.TABLE) 2190 is_function = self._match(TokenType.FUNCTION) 2191 2192 this = self._parse_table(schema=True) if not is_function else self._parse_function() 2193 2194 returning = self._parse_returning() 2195 2196 return self.expression( 2197 exp.Insert, 2198 comments=comments, 2199 hint=hint, 2200 is_function=is_function, 2201 this=this, 2202 by_name=self._match_text_seq("BY", "NAME"), 2203 exists=self._parse_exists(), 2204 partition=self._parse_partition(), 2205 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2206 and self._parse_conjunction(), 2207 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2208 conflict=self._parse_on_conflict(), 2209 returning=returning or self._parse_returning(), 2210 overwrite=overwrite, 2211 alternative=alternative, 2212 ignore=ignore, 2213 ) 2214 2215 def _parse_kill(self) -> exp.Kill: 2216 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2217 2218 return self.expression( 2219 exp.Kill, 2220 this=self._parse_primary(), 2221 kind=kind, 2222 ) 2223 2224 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2225 conflict = self._match_text_seq("ON", "CONFLICT") 2226 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2227 2228 if not conflict and not duplicate: 2229 return None 2230 2231 conflict_keys = None 2232 constraint = None 2233 2234 if conflict: 2235 if self._match_text_seq("ON", "CONSTRAINT"): 2236 constraint = self._parse_id_var() 2237 elif self._match(TokenType.L_PAREN): 2238 conflict_keys = self._parse_csv(self._parse_id_var) 2239 self._match_r_paren() 2240 2241 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2242 if self._prev.token_type == TokenType.UPDATE: 2243 self._match(TokenType.SET) 2244 expressions = self._parse_csv(self._parse_equality) 2245 else: 2246 expressions = None 2247 2248 return self.expression( 2249 exp.OnConflict, 2250 duplicate=duplicate, 2251 expressions=expressions, 2252 action=action, 2253 conflict_keys=conflict_keys, 2254 constraint=constraint, 2255 ) 2256 2257 def _parse_returning(self) -> t.Optional[exp.Returning]: 2258 if not self._match(TokenType.RETURNING): 2259 return None 2260 return self.expression( 2261 exp.Returning, 2262 expressions=self._parse_csv(self._parse_expression), 2263 into=self._match(TokenType.INTO) and self._parse_table_part(), 2264 ) 2265 2266 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2267 if not self._match(TokenType.FORMAT): 2268 return None 2269 return self._parse_row_format() 2270 2271 def _parse_row_format( 2272 self, match_row: bool = False 2273 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2274 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2275 return None 2276 2277 if self._match_text_seq("SERDE"): 2278 this = self._parse_string() 2279 2280 serde_properties = None 2281 if self._match(TokenType.SERDE_PROPERTIES): 2282 serde_properties = self.expression( 2283 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2284 ) 2285 2286 return self.expression( 2287 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2288 ) 2289 2290 self._match_text_seq("DELIMITED") 2291 2292 kwargs = {} 2293 2294 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2295 kwargs["fields"] = self._parse_string() 2296 if self._match_text_seq("ESCAPED", "BY"): 2297 kwargs["escaped"] = self._parse_string() 2298 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2299 kwargs["collection_items"] = self._parse_string() 2300 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2301 kwargs["map_keys"] = self._parse_string() 2302 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2303 kwargs["lines"] = self._parse_string() 2304 if self._match_text_seq("NULL", "DEFINED", "AS"): 2305 kwargs["null"] = self._parse_string() 2306 2307 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2308 2309 def _parse_load(self) -> exp.LoadData | exp.Command: 2310 if self._match_text_seq("DATA"): 2311 local = self._match_text_seq("LOCAL") 2312 self._match_text_seq("INPATH") 2313 inpath = self._parse_string() 2314 overwrite = self._match(TokenType.OVERWRITE) 2315 self._match_pair(TokenType.INTO, TokenType.TABLE) 2316 2317 return self.expression( 2318 exp.LoadData, 2319 this=self._parse_table(schema=True), 2320 local=local, 2321 overwrite=overwrite, 2322 inpath=inpath, 2323 partition=self._parse_partition(), 2324 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2325 serde=self._match_text_seq("SERDE") and self._parse_string(), 2326 ) 2327 return self._parse_as_command(self._prev) 2328 2329 def _parse_delete(self) -> exp.Delete: 2330 # This handles MySQL's "Multiple-Table Syntax" 2331 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2332 tables = None 2333 comments = self._prev_comments 2334 if not self._match(TokenType.FROM, advance=False): 2335 tables = self._parse_csv(self._parse_table) or None 2336 2337 returning = self._parse_returning() 2338 2339 return self.expression( 2340 exp.Delete, 2341 comments=comments, 2342 tables=tables, 2343 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2344 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2345 where=self._parse_where(), 2346 returning=returning or self._parse_returning(), 2347 limit=self._parse_limit(), 2348 ) 2349 2350 def _parse_update(self) -> exp.Update: 2351 comments = self._prev_comments 2352 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2353 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2354 returning = self._parse_returning() 2355 return self.expression( 2356 exp.Update, 2357 comments=comments, 2358 **{ # type: ignore 2359 "this": this, 2360 "expressions": expressions, 2361 "from": self._parse_from(joins=True), 2362 "where": self._parse_where(), 2363 "returning": returning or self._parse_returning(), 2364 "order": self._parse_order(), 2365 "limit": self._parse_limit(), 2366 }, 2367 ) 2368 2369 def _parse_uncache(self) -> exp.Uncache: 2370 if not self._match(TokenType.TABLE): 2371 self.raise_error("Expecting TABLE after UNCACHE") 2372 2373 return self.expression( 2374 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2375 ) 2376 2377 def _parse_cache(self) -> exp.Cache: 2378 lazy = self._match_text_seq("LAZY") 2379 self._match(TokenType.TABLE) 2380 table = self._parse_table(schema=True) 2381 2382 options = [] 2383 if self._match_text_seq("OPTIONS"): 2384 self._match_l_paren() 2385 k = self._parse_string() 2386 self._match(TokenType.EQ) 2387 v = self._parse_string() 2388 options = [k, v] 2389 self._match_r_paren() 2390 2391 self._match(TokenType.ALIAS) 2392 return self.expression( 2393 exp.Cache, 2394 this=table, 2395 lazy=lazy, 2396 options=options, 2397 expression=self._parse_select(nested=True), 2398 ) 2399 2400 def _parse_partition(self) -> t.Optional[exp.Partition]: 2401 if not self._match(TokenType.PARTITION): 2402 return None 2403 2404 return self.expression( 2405 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2406 ) 2407 2408 def _parse_value(self) -> exp.Tuple: 2409 if self._match(TokenType.L_PAREN): 2410 expressions = self._parse_csv(self._parse_expression) 2411 self._match_r_paren() 2412 return self.expression(exp.Tuple, expressions=expressions) 2413 2414 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2415 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2416 2417 def _parse_projections(self) -> t.List[exp.Expression]: 2418 return self._parse_expressions() 2419 2420 def _parse_select( 2421 self, 2422 nested: bool = False, 2423 table: bool = False, 2424 parse_subquery_alias: bool = True, 2425 parse_set_operation: bool = True, 2426 ) -> t.Optional[exp.Expression]: 2427 cte = self._parse_with() 2428 2429 if cte: 2430 this = self._parse_statement() 2431 2432 if not this: 2433 self.raise_error("Failed to parse any statement following CTE") 2434 return cte 2435 2436 if "with" in this.arg_types: 2437 this.set("with", cte) 2438 else: 2439 self.raise_error(f"{this.key} does not support CTE") 2440 this = cte 2441 2442 return this 2443 2444 # duckdb supports leading with FROM x 2445 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2446 2447 if self._match(TokenType.SELECT): 2448 comments = self._prev_comments 2449 2450 hint = self._parse_hint() 2451 all_ = self._match(TokenType.ALL) 2452 distinct = self._match_set(self.DISTINCT_TOKENS) 2453 2454 kind = ( 2455 self._match(TokenType.ALIAS) 2456 and self._match_texts(("STRUCT", "VALUE")) 2457 and self._prev.text.upper() 2458 ) 2459 2460 if distinct: 2461 distinct = self.expression( 2462 exp.Distinct, 2463 on=self._parse_value() if self._match(TokenType.ON) else None, 2464 ) 2465 2466 if all_ and distinct: 2467 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2468 2469 limit = self._parse_limit(top=True) 2470 projections = self._parse_projections() 2471 2472 this = self.expression( 2473 exp.Select, 2474 kind=kind, 2475 hint=hint, 2476 distinct=distinct, 2477 expressions=projections, 2478 limit=limit, 2479 ) 2480 this.comments = comments 2481 2482 into = self._parse_into() 2483 if into: 2484 this.set("into", into) 2485 2486 if not from_: 2487 from_ = self._parse_from() 2488 2489 if from_: 2490 this.set("from", from_) 2491 2492 this = self._parse_query_modifiers(this) 2493 elif (table or nested) and self._match(TokenType.L_PAREN): 2494 if self._match(TokenType.PIVOT): 2495 this = self._parse_simplified_pivot() 2496 elif self._match(TokenType.FROM): 2497 this = exp.select("*").from_( 2498 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2499 ) 2500 else: 2501 this = ( 2502 self._parse_table() 2503 if table 2504 else self._parse_select(nested=True, parse_set_operation=False) 2505 ) 2506 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2507 2508 self._match_r_paren() 2509 2510 # We return early here so that the UNION isn't attached to the subquery by the 2511 # following call to _parse_set_operations, but instead becomes the parent node 2512 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2513 elif self._match(TokenType.VALUES, advance=False): 2514 this = self._parse_derived_table_values() 2515 elif from_: 2516 this = exp.select("*").from_(from_.this, copy=False) 2517 else: 2518 this = None 2519 2520 if parse_set_operation: 2521 return self._parse_set_operations(this) 2522 return this 2523 2524 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2525 if not skip_with_token and not self._match(TokenType.WITH): 2526 return None 2527 2528 comments = self._prev_comments 2529 recursive = self._match(TokenType.RECURSIVE) 2530 2531 expressions = [] 2532 while True: 2533 expressions.append(self._parse_cte()) 2534 2535 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2536 break 2537 else: 2538 self._match(TokenType.WITH) 2539 2540 return self.expression( 2541 exp.With, comments=comments, expressions=expressions, recursive=recursive 2542 ) 2543 2544 def _parse_cte(self) -> exp.CTE: 2545 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2546 if not alias or not alias.this: 2547 self.raise_error("Expected CTE to have alias") 2548 2549 self._match(TokenType.ALIAS) 2550 2551 if self._match_text_seq("NOT", "MATERIALIZED"): 2552 materialized = False 2553 elif self._match_text_seq("MATERIALIZED"): 2554 materialized = True 2555 else: 2556 materialized = None 2557 2558 return self.expression( 2559 exp.CTE, 2560 this=self._parse_wrapped(self._parse_statement), 2561 alias=alias, 2562 materialized=materialized, 2563 ) 2564 2565 def _parse_table_alias( 2566 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2567 ) -> t.Optional[exp.TableAlias]: 2568 any_token = self._match(TokenType.ALIAS) 2569 alias = ( 2570 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2571 or self._parse_string_as_identifier() 2572 ) 2573 2574 index = self._index 2575 if self._match(TokenType.L_PAREN): 2576 columns = self._parse_csv(self._parse_function_parameter) 2577 self._match_r_paren() if columns else self._retreat(index) 2578 else: 2579 columns = None 2580 2581 if not alias and not columns: 2582 return None 2583 2584 return self.expression(exp.TableAlias, this=alias, columns=columns) 2585 2586 def _parse_subquery( 2587 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2588 ) -> t.Optional[exp.Subquery]: 2589 if not this: 2590 return None 2591 2592 return self.expression( 2593 exp.Subquery, 2594 this=this, 2595 pivots=self._parse_pivots(), 2596 alias=self._parse_table_alias() if parse_alias else None, 2597 ) 2598 2599 def _implicit_unnests_to_explicit(self, this: E) -> E: 2600 from sqlglot.optimizer.normalize_identifiers import ( 2601 normalize_identifiers as _norm, 2602 ) 2603 2604 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2605 for i, join in enumerate(this.args.get("joins") or []): 2606 table = join.this 2607 normalized_table = table.copy() 2608 normalized_table.meta["maybe_column"] = True 2609 normalized_table = _norm(normalized_table, dialect=self.dialect) 2610 2611 if isinstance(table, exp.Table) and not join.args.get("on"): 2612 if normalized_table.parts[0].name in refs: 2613 table_as_column = table.to_column() 2614 unnest = exp.Unnest(expressions=[table_as_column]) 2615 2616 # Table.to_column creates a parent Alias node that we want to convert to 2617 # a TableAlias and attach to the Unnest, so it matches the parser's output 2618 if isinstance(table.args.get("alias"), exp.TableAlias): 2619 table_as_column.replace(table_as_column.this) 2620 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2621 2622 table.replace(unnest) 2623 2624 refs.add(normalized_table.alias_or_name) 2625 2626 return this 2627 2628 def _parse_query_modifiers( 2629 self, this: t.Optional[exp.Expression] 2630 ) -> t.Optional[exp.Expression]: 2631 if isinstance(this, (exp.Query, exp.Table)): 2632 for join in iter(self._parse_join, None): 2633 this.append("joins", join) 2634 for lateral in iter(self._parse_lateral, None): 2635 this.append("laterals", lateral) 2636 2637 while True: 2638 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2639 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2640 key, expression = parser(self) 2641 2642 if expression: 2643 this.set(key, expression) 2644 if key == "limit": 2645 offset = expression.args.pop("offset", None) 2646 2647 if offset: 2648 offset = exp.Offset(expression=offset) 2649 this.set("offset", offset) 2650 2651 limit_by_expressions = expression.expressions 2652 expression.set("expressions", None) 2653 offset.set("expressions", limit_by_expressions) 2654 continue 2655 break 2656 2657 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2658 this = self._implicit_unnests_to_explicit(this) 2659 2660 return this 2661 2662 def _parse_hint(self) -> t.Optional[exp.Hint]: 2663 if self._match(TokenType.HINT): 2664 hints = [] 2665 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2666 hints.extend(hint) 2667 2668 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2669 self.raise_error("Expected */ after HINT") 2670 2671 return self.expression(exp.Hint, expressions=hints) 2672 2673 return None 2674 2675 def _parse_into(self) -> t.Optional[exp.Into]: 2676 if not self._match(TokenType.INTO): 2677 return None 2678 2679 temp = self._match(TokenType.TEMPORARY) 2680 unlogged = self._match_text_seq("UNLOGGED") 2681 self._match(TokenType.TABLE) 2682 2683 return self.expression( 2684 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2685 ) 2686 2687 def _parse_from( 2688 self, joins: bool = False, skip_from_token: bool = False 2689 ) -> t.Optional[exp.From]: 2690 if not skip_from_token and not self._match(TokenType.FROM): 2691 return None 2692 2693 return self.expression( 2694 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2695 ) 2696 2697 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2698 if not self._match(TokenType.MATCH_RECOGNIZE): 2699 return None 2700 2701 self._match_l_paren() 2702 2703 partition = self._parse_partition_by() 2704 order = self._parse_order() 2705 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2706 2707 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2708 rows = exp.var("ONE ROW PER MATCH") 2709 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2710 text = "ALL ROWS PER MATCH" 2711 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2712 text += " SHOW EMPTY MATCHES" 2713 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2714 text += " OMIT EMPTY MATCHES" 2715 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2716 text += " WITH UNMATCHED ROWS" 2717 rows = exp.var(text) 2718 else: 2719 rows = None 2720 2721 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2722 text = "AFTER MATCH SKIP" 2723 if self._match_text_seq("PAST", "LAST", "ROW"): 2724 text += " PAST LAST ROW" 2725 elif self._match_text_seq("TO", "NEXT", "ROW"): 2726 text += " TO NEXT ROW" 2727 elif self._match_text_seq("TO", "FIRST"): 2728 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2729 elif self._match_text_seq("TO", "LAST"): 2730 text += f" TO LAST {self._advance_any().text}" # type: ignore 2731 after = exp.var(text) 2732 else: 2733 after = None 2734 2735 if self._match_text_seq("PATTERN"): 2736 self._match_l_paren() 2737 2738 if not self._curr: 2739 self.raise_error("Expecting )", self._curr) 2740 2741 paren = 1 2742 start = self._curr 2743 2744 while self._curr and paren > 0: 2745 if self._curr.token_type == TokenType.L_PAREN: 2746 paren += 1 2747 if self._curr.token_type == TokenType.R_PAREN: 2748 paren -= 1 2749 2750 end = self._prev 2751 self._advance() 2752 2753 if paren > 0: 2754 self.raise_error("Expecting )", self._curr) 2755 2756 pattern = exp.var(self._find_sql(start, end)) 2757 else: 2758 pattern = None 2759 2760 define = ( 2761 self._parse_csv(self._parse_name_as_expression) 2762 if self._match_text_seq("DEFINE") 2763 else None 2764 ) 2765 2766 self._match_r_paren() 2767 2768 return self.expression( 2769 exp.MatchRecognize, 2770 partition_by=partition, 2771 order=order, 2772 measures=measures, 2773 rows=rows, 2774 after=after, 2775 pattern=pattern, 2776 define=define, 2777 alias=self._parse_table_alias(), 2778 ) 2779 2780 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2781 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2782 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2783 cross_apply = False 2784 2785 if cross_apply is not None: 2786 this = self._parse_select(table=True) 2787 view = None 2788 outer = None 2789 elif self._match(TokenType.LATERAL): 2790 this = self._parse_select(table=True) 2791 view = self._match(TokenType.VIEW) 2792 outer = self._match(TokenType.OUTER) 2793 else: 2794 return None 2795 2796 if not this: 2797 this = ( 2798 self._parse_unnest() 2799 or self._parse_function() 2800 or self._parse_id_var(any_token=False) 2801 ) 2802 2803 while self._match(TokenType.DOT): 2804 this = exp.Dot( 2805 this=this, 2806 expression=self._parse_function() or self._parse_id_var(any_token=False), 2807 ) 2808 2809 if view: 2810 table = self._parse_id_var(any_token=False) 2811 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2812 table_alias: t.Optional[exp.TableAlias] = self.expression( 2813 exp.TableAlias, this=table, columns=columns 2814 ) 2815 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2816 # We move the alias from the lateral's child node to the lateral itself 2817 table_alias = this.args["alias"].pop() 2818 else: 2819 table_alias = self._parse_table_alias() 2820 2821 return self.expression( 2822 exp.Lateral, 2823 this=this, 2824 view=view, 2825 outer=outer, 2826 alias=table_alias, 2827 cross_apply=cross_apply, 2828 ) 2829 2830 def _parse_join_parts( 2831 self, 2832 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2833 return ( 2834 self._match_set(self.JOIN_METHODS) and self._prev, 2835 self._match_set(self.JOIN_SIDES) and self._prev, 2836 self._match_set(self.JOIN_KINDS) and self._prev, 2837 ) 2838 2839 def _parse_join( 2840 self, skip_join_token: bool = False, parse_bracket: bool = False 2841 ) -> t.Optional[exp.Join]: 2842 if self._match(TokenType.COMMA): 2843 return self.expression(exp.Join, this=self._parse_table()) 2844 2845 index = self._index 2846 method, side, kind = self._parse_join_parts() 2847 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2848 join = self._match(TokenType.JOIN) 2849 2850 if not skip_join_token and not join: 2851 self._retreat(index) 2852 kind = None 2853 method = None 2854 side = None 2855 2856 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2857 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2858 2859 if not skip_join_token and not join and not outer_apply and not cross_apply: 2860 return None 2861 2862 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2863 2864 if method: 2865 kwargs["method"] = method.text 2866 if side: 2867 kwargs["side"] = side.text 2868 if kind: 2869 kwargs["kind"] = kind.text 2870 if hint: 2871 kwargs["hint"] = hint 2872 2873 if self._match(TokenType.ON): 2874 kwargs["on"] = self._parse_conjunction() 2875 elif self._match(TokenType.USING): 2876 kwargs["using"] = self._parse_wrapped_id_vars() 2877 elif not (kind and kind.token_type == TokenType.CROSS): 2878 index = self._index 2879 join = self._parse_join() 2880 2881 if join and self._match(TokenType.ON): 2882 kwargs["on"] = self._parse_conjunction() 2883 elif join and self._match(TokenType.USING): 2884 kwargs["using"] = self._parse_wrapped_id_vars() 2885 else: 2886 join = None 2887 self._retreat(index) 2888 2889 kwargs["this"].set("joins", [join] if join else None) 2890 2891 comments = [c for token in (method, side, kind) if token for c in token.comments] 2892 return self.expression(exp.Join, comments=comments, **kwargs) 2893 2894 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2895 this = self._parse_conjunction() 2896 2897 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2898 return this 2899 2900 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2901 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2902 2903 return this 2904 2905 def _parse_index_params(self) -> exp.IndexParameters: 2906 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2907 2908 if self._match(TokenType.L_PAREN, advance=False): 2909 columns = self._parse_wrapped_csv(self._parse_with_operator) 2910 else: 2911 columns = None 2912 2913 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2914 partition_by = self._parse_partition_by() 2915 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 2916 tablespace = ( 2917 self._parse_var(any_token=True) 2918 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 2919 else None 2920 ) 2921 where = self._parse_where() 2922 2923 return self.expression( 2924 exp.IndexParameters, 2925 using=using, 2926 columns=columns, 2927 include=include, 2928 partition_by=partition_by, 2929 where=where, 2930 with_storage=with_storage, 2931 tablespace=tablespace, 2932 ) 2933 2934 def _parse_index( 2935 self, 2936 index: t.Optional[exp.Expression] = None, 2937 ) -> t.Optional[exp.Index]: 2938 if index: 2939 unique = None 2940 primary = None 2941 amp = None 2942 2943 self._match(TokenType.ON) 2944 self._match(TokenType.TABLE) # hive 2945 table = self._parse_table_parts(schema=True) 2946 else: 2947 unique = self._match(TokenType.UNIQUE) 2948 primary = self._match_text_seq("PRIMARY") 2949 amp = self._match_text_seq("AMP") 2950 2951 if not self._match(TokenType.INDEX): 2952 return None 2953 2954 index = self._parse_id_var() 2955 table = None 2956 2957 params = self._parse_index_params() 2958 2959 return self.expression( 2960 exp.Index, 2961 this=index, 2962 table=table, 2963 unique=unique, 2964 primary=primary, 2965 amp=amp, 2966 params=params, 2967 ) 2968 2969 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2970 hints: t.List[exp.Expression] = [] 2971 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2972 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2973 hints.append( 2974 self.expression( 2975 exp.WithTableHint, 2976 expressions=self._parse_csv( 2977 lambda: self._parse_function() or self._parse_var(any_token=True) 2978 ), 2979 ) 2980 ) 2981 self._match_r_paren() 2982 else: 2983 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2984 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2985 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2986 2987 self._match_texts(("INDEX", "KEY")) 2988 if self._match(TokenType.FOR): 2989 hint.set("target", self._advance_any() and self._prev.text.upper()) 2990 2991 hint.set("expressions", self._parse_wrapped_id_vars()) 2992 hints.append(hint) 2993 2994 return hints or None 2995 2996 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2997 return ( 2998 (not schema and self._parse_function(optional_parens=False)) 2999 or self._parse_id_var(any_token=False) 3000 or self._parse_string_as_identifier() 3001 or self._parse_placeholder() 3002 ) 3003 3004 def _parse_table_parts( 3005 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3006 ) -> exp.Table: 3007 catalog = None 3008 db = None 3009 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3010 3011 while self._match(TokenType.DOT): 3012 if catalog: 3013 # This allows nesting the table in arbitrarily many dot expressions if needed 3014 table = self.expression( 3015 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3016 ) 3017 else: 3018 catalog = db 3019 db = table 3020 # "" used for tsql FROM a..b case 3021 table = self._parse_table_part(schema=schema) or "" 3022 3023 if ( 3024 wildcard 3025 and self._is_connected() 3026 and (isinstance(table, exp.Identifier) or not table) 3027 and self._match(TokenType.STAR) 3028 ): 3029 if isinstance(table, exp.Identifier): 3030 table.args["this"] += "*" 3031 else: 3032 table = exp.Identifier(this="*") 3033 3034 if is_db_reference: 3035 catalog = db 3036 db = table 3037 table = None 3038 3039 if not table and not is_db_reference: 3040 self.raise_error(f"Expected table name but got {self._curr}") 3041 if not db and is_db_reference: 3042 self.raise_error(f"Expected database name but got {self._curr}") 3043 3044 return self.expression( 3045 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 3046 ) 3047 3048 def _parse_table( 3049 self, 3050 schema: bool = False, 3051 joins: bool = False, 3052 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3053 parse_bracket: bool = False, 3054 is_db_reference: bool = False, 3055 ) -> t.Optional[exp.Expression]: 3056 lateral = self._parse_lateral() 3057 if lateral: 3058 return lateral 3059 3060 unnest = self._parse_unnest() 3061 if unnest: 3062 return unnest 3063 3064 values = self._parse_derived_table_values() 3065 if values: 3066 return values 3067 3068 subquery = self._parse_select(table=True) 3069 if subquery: 3070 if not subquery.args.get("pivots"): 3071 subquery.set("pivots", self._parse_pivots()) 3072 return subquery 3073 3074 bracket = parse_bracket and self._parse_bracket(None) 3075 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3076 3077 only = self._match(TokenType.ONLY) 3078 3079 this = t.cast( 3080 exp.Expression, 3081 bracket 3082 or self._parse_bracket( 3083 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3084 ), 3085 ) 3086 3087 if only: 3088 this.set("only", only) 3089 3090 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3091 self._match_text_seq("*") 3092 3093 if schema: 3094 return self._parse_schema(this=this) 3095 3096 version = self._parse_version() 3097 3098 if version: 3099 this.set("version", version) 3100 3101 if self.dialect.ALIAS_POST_TABLESAMPLE: 3102 table_sample = self._parse_table_sample() 3103 3104 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3105 if alias: 3106 this.set("alias", alias) 3107 3108 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3109 return self.expression( 3110 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3111 ) 3112 3113 this.set("hints", self._parse_table_hints()) 3114 3115 if not this.args.get("pivots"): 3116 this.set("pivots", self._parse_pivots()) 3117 3118 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3119 table_sample = self._parse_table_sample() 3120 3121 if table_sample: 3122 table_sample.set("this", this) 3123 this = table_sample 3124 3125 if joins: 3126 for join in iter(self._parse_join, None): 3127 this.append("joins", join) 3128 3129 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3130 this.set("ordinality", True) 3131 this.set("alias", self._parse_table_alias()) 3132 3133 return this 3134 3135 def _parse_version(self) -> t.Optional[exp.Version]: 3136 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3137 this = "TIMESTAMP" 3138 elif self._match(TokenType.VERSION_SNAPSHOT): 3139 this = "VERSION" 3140 else: 3141 return None 3142 3143 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3144 kind = self._prev.text.upper() 3145 start = self._parse_bitwise() 3146 self._match_texts(("TO", "AND")) 3147 end = self._parse_bitwise() 3148 expression: t.Optional[exp.Expression] = self.expression( 3149 exp.Tuple, expressions=[start, end] 3150 ) 3151 elif self._match_text_seq("CONTAINED", "IN"): 3152 kind = "CONTAINED IN" 3153 expression = self.expression( 3154 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3155 ) 3156 elif self._match(TokenType.ALL): 3157 kind = "ALL" 3158 expression = None 3159 else: 3160 self._match_text_seq("AS", "OF") 3161 kind = "AS OF" 3162 expression = self._parse_type() 3163 3164 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3165 3166 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3167 if not self._match(TokenType.UNNEST): 3168 return None 3169 3170 expressions = self._parse_wrapped_csv(self._parse_equality) 3171 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3172 3173 alias = self._parse_table_alias() if with_alias else None 3174 3175 if alias: 3176 if self.dialect.UNNEST_COLUMN_ONLY: 3177 if alias.args.get("columns"): 3178 self.raise_error("Unexpected extra column alias in unnest.") 3179 3180 alias.set("columns", [alias.this]) 3181 alias.set("this", None) 3182 3183 columns = alias.args.get("columns") or [] 3184 if offset and len(expressions) < len(columns): 3185 offset = columns.pop() 3186 3187 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3188 self._match(TokenType.ALIAS) 3189 offset = self._parse_id_var( 3190 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3191 ) or exp.to_identifier("offset") 3192 3193 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3194 3195 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3196 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3197 if not is_derived and not self._match_text_seq("VALUES"): 3198 return None 3199 3200 expressions = self._parse_csv(self._parse_value) 3201 alias = self._parse_table_alias() 3202 3203 if is_derived: 3204 self._match_r_paren() 3205 3206 return self.expression( 3207 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3208 ) 3209 3210 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3211 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3212 as_modifier and self._match_text_seq("USING", "SAMPLE") 3213 ): 3214 return None 3215 3216 bucket_numerator = None 3217 bucket_denominator = None 3218 bucket_field = None 3219 percent = None 3220 size = None 3221 seed = None 3222 3223 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3224 matched_l_paren = self._match(TokenType.L_PAREN) 3225 3226 if self.TABLESAMPLE_CSV: 3227 num = None 3228 expressions = self._parse_csv(self._parse_primary) 3229 else: 3230 expressions = None 3231 num = ( 3232 self._parse_factor() 3233 if self._match(TokenType.NUMBER, advance=False) 3234 else self._parse_primary() or self._parse_placeholder() 3235 ) 3236 3237 if self._match_text_seq("BUCKET"): 3238 bucket_numerator = self._parse_number() 3239 self._match_text_seq("OUT", "OF") 3240 bucket_denominator = bucket_denominator = self._parse_number() 3241 self._match(TokenType.ON) 3242 bucket_field = self._parse_field() 3243 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3244 percent = num 3245 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3246 size = num 3247 else: 3248 percent = num 3249 3250 if matched_l_paren: 3251 self._match_r_paren() 3252 3253 if self._match(TokenType.L_PAREN): 3254 method = self._parse_var(upper=True) 3255 seed = self._match(TokenType.COMMA) and self._parse_number() 3256 self._match_r_paren() 3257 elif self._match_texts(("SEED", "REPEATABLE")): 3258 seed = self._parse_wrapped(self._parse_number) 3259 3260 return self.expression( 3261 exp.TableSample, 3262 expressions=expressions, 3263 method=method, 3264 bucket_numerator=bucket_numerator, 3265 bucket_denominator=bucket_denominator, 3266 bucket_field=bucket_field, 3267 percent=percent, 3268 size=size, 3269 seed=seed, 3270 ) 3271 3272 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3273 return list(iter(self._parse_pivot, None)) or None 3274 3275 # https://duckdb.org/docs/sql/statements/pivot 3276 def _parse_simplified_pivot(self) -> exp.Pivot: 3277 def _parse_on() -> t.Optional[exp.Expression]: 3278 this = self._parse_bitwise() 3279 return self._parse_in(this) if self._match(TokenType.IN) else this 3280 3281 this = self._parse_table() 3282 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3283 using = self._match(TokenType.USING) and self._parse_csv( 3284 lambda: self._parse_alias(self._parse_function()) 3285 ) 3286 group = self._parse_group() 3287 return self.expression( 3288 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3289 ) 3290 3291 def _parse_pivot_in(self) -> exp.In: 3292 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3293 this = self._parse_conjunction() 3294 3295 self._match(TokenType.ALIAS) 3296 alias = self._parse_field() 3297 if alias: 3298 return self.expression(exp.PivotAlias, this=this, alias=alias) 3299 3300 return this 3301 3302 value = self._parse_column() 3303 3304 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3305 self.raise_error("Expecting IN (") 3306 3307 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3308 3309 self._match_r_paren() 3310 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3311 3312 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3313 index = self._index 3314 include_nulls = None 3315 3316 if self._match(TokenType.PIVOT): 3317 unpivot = False 3318 elif self._match(TokenType.UNPIVOT): 3319 unpivot = True 3320 3321 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3322 if self._match_text_seq("INCLUDE", "NULLS"): 3323 include_nulls = True 3324 elif self._match_text_seq("EXCLUDE", "NULLS"): 3325 include_nulls = False 3326 else: 3327 return None 3328 3329 expressions = [] 3330 3331 if not self._match(TokenType.L_PAREN): 3332 self._retreat(index) 3333 return None 3334 3335 if unpivot: 3336 expressions = self._parse_csv(self._parse_column) 3337 else: 3338 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3339 3340 if not expressions: 3341 self.raise_error("Failed to parse PIVOT's aggregation list") 3342 3343 if not self._match(TokenType.FOR): 3344 self.raise_error("Expecting FOR") 3345 3346 field = self._parse_pivot_in() 3347 3348 self._match_r_paren() 3349 3350 pivot = self.expression( 3351 exp.Pivot, 3352 expressions=expressions, 3353 field=field, 3354 unpivot=unpivot, 3355 include_nulls=include_nulls, 3356 ) 3357 3358 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3359 pivot.set("alias", self._parse_table_alias()) 3360 3361 if not unpivot: 3362 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3363 3364 columns: t.List[exp.Expression] = [] 3365 for fld in pivot.args["field"].expressions: 3366 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3367 for name in names: 3368 if self.PREFIXED_PIVOT_COLUMNS: 3369 name = f"{name}_{field_name}" if name else field_name 3370 else: 3371 name = f"{field_name}_{name}" if name else field_name 3372 3373 columns.append(exp.to_identifier(name)) 3374 3375 pivot.set("columns", columns) 3376 3377 return pivot 3378 3379 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3380 return [agg.alias for agg in aggregations] 3381 3382 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3383 if not skip_where_token and not self._match(TokenType.PREWHERE): 3384 return None 3385 3386 return self.expression( 3387 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3388 ) 3389 3390 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3391 if not skip_where_token and not self._match(TokenType.WHERE): 3392 return None 3393 3394 return self.expression( 3395 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3396 ) 3397 3398 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3399 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3400 return None 3401 3402 elements = defaultdict(list) 3403 3404 if self._match(TokenType.ALL): 3405 return self.expression(exp.Group, all=True) 3406 3407 while True: 3408 expressions = self._parse_csv(self._parse_conjunction) 3409 if expressions: 3410 elements["expressions"].extend(expressions) 3411 3412 grouping_sets = self._parse_grouping_sets() 3413 if grouping_sets: 3414 elements["grouping_sets"].extend(grouping_sets) 3415 3416 rollup = None 3417 cube = None 3418 totals = None 3419 3420 index = self._index 3421 with_ = self._match(TokenType.WITH) 3422 if self._match(TokenType.ROLLUP): 3423 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3424 elements["rollup"].extend(ensure_list(rollup)) 3425 3426 if self._match(TokenType.CUBE): 3427 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3428 elements["cube"].extend(ensure_list(cube)) 3429 3430 if self._match_text_seq("TOTALS"): 3431 totals = True 3432 elements["totals"] = True # type: ignore 3433 3434 if not (grouping_sets or rollup or cube or totals): 3435 if with_: 3436 self._retreat(index) 3437 break 3438 3439 return self.expression(exp.Group, **elements) # type: ignore 3440 3441 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3442 if not self._match(TokenType.GROUPING_SETS): 3443 return None 3444 3445 return self._parse_wrapped_csv(self._parse_grouping_set) 3446 3447 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3448 if self._match(TokenType.L_PAREN): 3449 grouping_set = self._parse_csv(self._parse_column) 3450 self._match_r_paren() 3451 return self.expression(exp.Tuple, expressions=grouping_set) 3452 3453 return self._parse_column() 3454 3455 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3456 if not skip_having_token and not self._match(TokenType.HAVING): 3457 return None 3458 return self.expression(exp.Having, this=self._parse_conjunction()) 3459 3460 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3461 if not self._match(TokenType.QUALIFY): 3462 return None 3463 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3464 3465 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3466 if skip_start_token: 3467 start = None 3468 elif self._match(TokenType.START_WITH): 3469 start = self._parse_conjunction() 3470 else: 3471 return None 3472 3473 self._match(TokenType.CONNECT_BY) 3474 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3475 exp.Prior, this=self._parse_bitwise() 3476 ) 3477 connect = self._parse_conjunction() 3478 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3479 3480 if not start and self._match(TokenType.START_WITH): 3481 start = self._parse_conjunction() 3482 3483 return self.expression(exp.Connect, start=start, connect=connect) 3484 3485 def _parse_name_as_expression(self) -> exp.Alias: 3486 return self.expression( 3487 exp.Alias, 3488 alias=self._parse_id_var(any_token=True), 3489 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3490 ) 3491 3492 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3493 if self._match_text_seq("INTERPOLATE"): 3494 return self._parse_wrapped_csv(self._parse_name_as_expression) 3495 return None 3496 3497 def _parse_order( 3498 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3499 ) -> t.Optional[exp.Expression]: 3500 siblings = None 3501 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3502 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3503 return this 3504 3505 siblings = True 3506 3507 return self.expression( 3508 exp.Order, 3509 this=this, 3510 expressions=self._parse_csv(self._parse_ordered), 3511 interpolate=self._parse_interpolate(), 3512 siblings=siblings, 3513 ) 3514 3515 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3516 if not self._match(token): 3517 return None 3518 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3519 3520 def _parse_ordered( 3521 self, parse_method: t.Optional[t.Callable] = None 3522 ) -> t.Optional[exp.Ordered]: 3523 this = parse_method() if parse_method else self._parse_conjunction() 3524 if not this: 3525 return None 3526 3527 asc = self._match(TokenType.ASC) 3528 desc = self._match(TokenType.DESC) or (asc and False) 3529 3530 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3531 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3532 3533 nulls_first = is_nulls_first or False 3534 explicitly_null_ordered = is_nulls_first or is_nulls_last 3535 3536 if ( 3537 not explicitly_null_ordered 3538 and ( 3539 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3540 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3541 ) 3542 and self.dialect.NULL_ORDERING != "nulls_are_last" 3543 ): 3544 nulls_first = True 3545 3546 if self._match_text_seq("WITH", "FILL"): 3547 with_fill = self.expression( 3548 exp.WithFill, 3549 **{ # type: ignore 3550 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3551 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3552 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3553 }, 3554 ) 3555 else: 3556 with_fill = None 3557 3558 return self.expression( 3559 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3560 ) 3561 3562 def _parse_limit( 3563 self, 3564 this: t.Optional[exp.Expression] = None, 3565 top: bool = False, 3566 skip_limit_token: bool = False, 3567 ) -> t.Optional[exp.Expression]: 3568 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3569 comments = self._prev_comments 3570 if top: 3571 limit_paren = self._match(TokenType.L_PAREN) 3572 expression = self._parse_term() if limit_paren else self._parse_number() 3573 3574 if limit_paren: 3575 self._match_r_paren() 3576 else: 3577 expression = self._parse_term() 3578 3579 if self._match(TokenType.COMMA): 3580 offset = expression 3581 expression = self._parse_term() 3582 else: 3583 offset = None 3584 3585 limit_exp = self.expression( 3586 exp.Limit, 3587 this=this, 3588 expression=expression, 3589 offset=offset, 3590 comments=comments, 3591 expressions=self._parse_limit_by(), 3592 ) 3593 3594 return limit_exp 3595 3596 if self._match(TokenType.FETCH): 3597 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3598 direction = self._prev.text.upper() if direction else "FIRST" 3599 3600 count = self._parse_field(tokens=self.FETCH_TOKENS) 3601 percent = self._match(TokenType.PERCENT) 3602 3603 self._match_set((TokenType.ROW, TokenType.ROWS)) 3604 3605 only = self._match_text_seq("ONLY") 3606 with_ties = self._match_text_seq("WITH", "TIES") 3607 3608 if only and with_ties: 3609 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3610 3611 return self.expression( 3612 exp.Fetch, 3613 direction=direction, 3614 count=count, 3615 percent=percent, 3616 with_ties=with_ties, 3617 ) 3618 3619 return this 3620 3621 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3622 if not self._match(TokenType.OFFSET): 3623 return this 3624 3625 count = self._parse_term() 3626 self._match_set((TokenType.ROW, TokenType.ROWS)) 3627 3628 return self.expression( 3629 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3630 ) 3631 3632 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3633 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3634 3635 def _parse_locks(self) -> t.List[exp.Lock]: 3636 locks = [] 3637 while True: 3638 if self._match_text_seq("FOR", "UPDATE"): 3639 update = True 3640 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3641 "LOCK", "IN", "SHARE", "MODE" 3642 ): 3643 update = False 3644 else: 3645 break 3646 3647 expressions = None 3648 if self._match_text_seq("OF"): 3649 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3650 3651 wait: t.Optional[bool | exp.Expression] = None 3652 if self._match_text_seq("NOWAIT"): 3653 wait = True 3654 elif self._match_text_seq("WAIT"): 3655 wait = self._parse_primary() 3656 elif self._match_text_seq("SKIP", "LOCKED"): 3657 wait = False 3658 3659 locks.append( 3660 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3661 ) 3662 3663 return locks 3664 3665 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3666 while this and self._match_set(self.SET_OPERATIONS): 3667 token_type = self._prev.token_type 3668 3669 if token_type == TokenType.UNION: 3670 operation = exp.Union 3671 elif token_type == TokenType.EXCEPT: 3672 operation = exp.Except 3673 else: 3674 operation = exp.Intersect 3675 3676 comments = self._prev.comments 3677 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3678 by_name = self._match_text_seq("BY", "NAME") 3679 expression = self._parse_select(nested=True, parse_set_operation=False) 3680 3681 this = self.expression( 3682 operation, 3683 comments=comments, 3684 this=this, 3685 distinct=distinct, 3686 by_name=by_name, 3687 expression=expression, 3688 ) 3689 3690 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3691 expression = this.expression 3692 3693 if expression: 3694 for arg in self.UNION_MODIFIERS: 3695 expr = expression.args.get(arg) 3696 if expr: 3697 this.set(arg, expr.pop()) 3698 3699 return this 3700 3701 def _parse_expression(self) -> t.Optional[exp.Expression]: 3702 return self._parse_alias(self._parse_conjunction()) 3703 3704 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3705 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3706 3707 def _parse_equality(self) -> t.Optional[exp.Expression]: 3708 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3709 3710 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3711 return self._parse_tokens(self._parse_range, self.COMPARISON) 3712 3713 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3714 this = this or self._parse_bitwise() 3715 negate = self._match(TokenType.NOT) 3716 3717 if self._match_set(self.RANGE_PARSERS): 3718 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3719 if not expression: 3720 return this 3721 3722 this = expression 3723 elif self._match(TokenType.ISNULL): 3724 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3725 3726 # Postgres supports ISNULL and NOTNULL for conditions. 3727 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3728 if self._match(TokenType.NOTNULL): 3729 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3730 this = self.expression(exp.Not, this=this) 3731 3732 if negate: 3733 this = self.expression(exp.Not, this=this) 3734 3735 if self._match(TokenType.IS): 3736 this = self._parse_is(this) 3737 3738 return this 3739 3740 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3741 index = self._index - 1 3742 negate = self._match(TokenType.NOT) 3743 3744 if self._match_text_seq("DISTINCT", "FROM"): 3745 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3746 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3747 3748 expression = self._parse_null() or self._parse_boolean() 3749 if not expression: 3750 self._retreat(index) 3751 return None 3752 3753 this = self.expression(exp.Is, this=this, expression=expression) 3754 return self.expression(exp.Not, this=this) if negate else this 3755 3756 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3757 unnest = self._parse_unnest(with_alias=False) 3758 if unnest: 3759 this = self.expression(exp.In, this=this, unnest=unnest) 3760 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3761 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3762 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3763 3764 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3765 this = self.expression(exp.In, this=this, query=expressions[0]) 3766 else: 3767 this = self.expression(exp.In, this=this, expressions=expressions) 3768 3769 if matched_l_paren: 3770 self._match_r_paren(this) 3771 elif not self._match(TokenType.R_BRACKET, expression=this): 3772 self.raise_error("Expecting ]") 3773 else: 3774 this = self.expression(exp.In, this=this, field=self._parse_field()) 3775 3776 return this 3777 3778 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3779 low = self._parse_bitwise() 3780 self._match(TokenType.AND) 3781 high = self._parse_bitwise() 3782 return self.expression(exp.Between, this=this, low=low, high=high) 3783 3784 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3785 if not self._match(TokenType.ESCAPE): 3786 return this 3787 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3788 3789 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3790 index = self._index 3791 3792 if not self._match(TokenType.INTERVAL) and match_interval: 3793 return None 3794 3795 if self._match(TokenType.STRING, advance=False): 3796 this = self._parse_primary() 3797 else: 3798 this = self._parse_term() 3799 3800 if not this or ( 3801 isinstance(this, exp.Column) 3802 and not this.table 3803 and not this.this.quoted 3804 and this.name.upper() == "IS" 3805 ): 3806 self._retreat(index) 3807 return None 3808 3809 unit = self._parse_function() or ( 3810 not self._match(TokenType.ALIAS, advance=False) 3811 and self._parse_var(any_token=True, upper=True) 3812 ) 3813 3814 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3815 # each INTERVAL expression into this canonical form so it's easy to transpile 3816 if this and this.is_number: 3817 this = exp.Literal.string(this.name) 3818 elif this and this.is_string: 3819 parts = this.name.split() 3820 3821 if len(parts) == 2: 3822 if unit: 3823 # This is not actually a unit, it's something else (e.g. a "window side") 3824 unit = None 3825 self._retreat(self._index - 1) 3826 3827 this = exp.Literal.string(parts[0]) 3828 unit = self.expression(exp.Var, this=parts[1].upper()) 3829 3830 return self.expression(exp.Interval, this=this, unit=unit) 3831 3832 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3833 this = self._parse_term() 3834 3835 while True: 3836 if self._match_set(self.BITWISE): 3837 this = self.expression( 3838 self.BITWISE[self._prev.token_type], 3839 this=this, 3840 expression=self._parse_term(), 3841 ) 3842 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3843 this = self.expression( 3844 exp.DPipe, 3845 this=this, 3846 expression=self._parse_term(), 3847 safe=not self.dialect.STRICT_STRING_CONCAT, 3848 ) 3849 elif self._match(TokenType.DQMARK): 3850 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3851 elif self._match_pair(TokenType.LT, TokenType.LT): 3852 this = self.expression( 3853 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3854 ) 3855 elif self._match_pair(TokenType.GT, TokenType.GT): 3856 this = self.expression( 3857 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3858 ) 3859 else: 3860 break 3861 3862 return this 3863 3864 def _parse_term(self) -> t.Optional[exp.Expression]: 3865 return self._parse_tokens(self._parse_factor, self.TERM) 3866 3867 def _parse_factor(self) -> t.Optional[exp.Expression]: 3868 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3869 this = parse_method() 3870 3871 while self._match_set(self.FACTOR): 3872 this = self.expression( 3873 self.FACTOR[self._prev.token_type], 3874 this=this, 3875 comments=self._prev_comments, 3876 expression=parse_method(), 3877 ) 3878 if isinstance(this, exp.Div): 3879 this.args["typed"] = self.dialect.TYPED_DIVISION 3880 this.args["safe"] = self.dialect.SAFE_DIVISION 3881 3882 return this 3883 3884 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3885 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3886 3887 def _parse_unary(self) -> t.Optional[exp.Expression]: 3888 if self._match_set(self.UNARY_PARSERS): 3889 return self.UNARY_PARSERS[self._prev.token_type](self) 3890 return self._parse_at_time_zone(self._parse_type()) 3891 3892 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3893 interval = parse_interval and self._parse_interval() 3894 if interval: 3895 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3896 while True: 3897 index = self._index 3898 self._match(TokenType.PLUS) 3899 3900 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3901 self._retreat(index) 3902 break 3903 3904 interval = self.expression( # type: ignore 3905 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3906 ) 3907 3908 return interval 3909 3910 index = self._index 3911 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3912 this = self._parse_column() 3913 3914 if data_type: 3915 if isinstance(this, exp.Literal): 3916 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3917 if parser: 3918 return parser(self, this, data_type) 3919 return self.expression(exp.Cast, this=this, to=data_type) 3920 if not data_type.expressions: 3921 self._retreat(index) 3922 return self._parse_column() 3923 return self._parse_column_ops(data_type) 3924 3925 return this and self._parse_column_ops(this) 3926 3927 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3928 this = self._parse_type() 3929 if not this: 3930 return None 3931 3932 return self.expression( 3933 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3934 ) 3935 3936 def _parse_types( 3937 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3938 ) -> t.Optional[exp.Expression]: 3939 index = self._index 3940 3941 prefix = self._match_text_seq("SYSUDTLIB", ".") 3942 3943 if not self._match_set(self.TYPE_TOKENS): 3944 identifier = allow_identifiers and self._parse_id_var( 3945 any_token=False, tokens=(TokenType.VAR,) 3946 ) 3947 if identifier: 3948 tokens = self.dialect.tokenize(identifier.name) 3949 3950 if len(tokens) != 1: 3951 self.raise_error("Unexpected identifier", self._prev) 3952 3953 if tokens[0].token_type in self.TYPE_TOKENS: 3954 self._prev = tokens[0] 3955 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3956 type_name = identifier.name 3957 3958 while self._match(TokenType.DOT): 3959 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3960 3961 return exp.DataType.build(type_name, udt=True) 3962 else: 3963 self._retreat(self._index - 1) 3964 return None 3965 else: 3966 return None 3967 3968 type_token = self._prev.token_type 3969 3970 if type_token == TokenType.PSEUDO_TYPE: 3971 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3972 3973 if type_token == TokenType.OBJECT_IDENTIFIER: 3974 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3975 3976 nested = type_token in self.NESTED_TYPE_TOKENS 3977 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3978 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3979 expressions = None 3980 maybe_func = False 3981 3982 if self._match(TokenType.L_PAREN): 3983 if is_struct: 3984 expressions = self._parse_csv(self._parse_struct_types) 3985 elif nested: 3986 expressions = self._parse_csv( 3987 lambda: self._parse_types( 3988 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3989 ) 3990 ) 3991 elif type_token in self.ENUM_TYPE_TOKENS: 3992 expressions = self._parse_csv(self._parse_equality) 3993 elif is_aggregate: 3994 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3995 any_token=False, tokens=(TokenType.VAR,) 3996 ) 3997 if not func_or_ident or not self._match(TokenType.COMMA): 3998 return None 3999 expressions = self._parse_csv( 4000 lambda: self._parse_types( 4001 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4002 ) 4003 ) 4004 expressions.insert(0, func_or_ident) 4005 else: 4006 expressions = self._parse_csv(self._parse_type_size) 4007 4008 if not expressions or not self._match(TokenType.R_PAREN): 4009 self._retreat(index) 4010 return None 4011 4012 maybe_func = True 4013 4014 this: t.Optional[exp.Expression] = None 4015 values: t.Optional[t.List[exp.Expression]] = None 4016 4017 if nested and self._match(TokenType.LT): 4018 if is_struct: 4019 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4020 else: 4021 expressions = self._parse_csv( 4022 lambda: self._parse_types( 4023 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4024 ) 4025 ) 4026 4027 if not self._match(TokenType.GT): 4028 self.raise_error("Expecting >") 4029 4030 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4031 values = self._parse_csv(self._parse_conjunction) 4032 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4033 4034 if type_token in self.TIMESTAMPS: 4035 if self._match_text_seq("WITH", "TIME", "ZONE"): 4036 maybe_func = False 4037 tz_type = ( 4038 exp.DataType.Type.TIMETZ 4039 if type_token in self.TIMES 4040 else exp.DataType.Type.TIMESTAMPTZ 4041 ) 4042 this = exp.DataType(this=tz_type, expressions=expressions) 4043 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4044 maybe_func = False 4045 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4046 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4047 maybe_func = False 4048 elif type_token == TokenType.INTERVAL: 4049 unit = self._parse_var() 4050 4051 if self._match_text_seq("TO"): 4052 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 4053 else: 4054 span = None 4055 4056 if span or not unit: 4057 this = self.expression( 4058 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 4059 ) 4060 else: 4061 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4062 4063 if maybe_func and check_func: 4064 index2 = self._index 4065 peek = self._parse_string() 4066 4067 if not peek: 4068 self._retreat(index) 4069 return None 4070 4071 self._retreat(index2) 4072 4073 if not this: 4074 if self._match_text_seq("UNSIGNED"): 4075 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4076 if not unsigned_type_token: 4077 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4078 4079 type_token = unsigned_type_token or type_token 4080 4081 this = exp.DataType( 4082 this=exp.DataType.Type[type_token.value], 4083 expressions=expressions, 4084 nested=nested, 4085 values=values, 4086 prefix=prefix, 4087 ) 4088 4089 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4090 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4091 4092 return this 4093 4094 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4095 index = self._index 4096 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4097 self._match(TokenType.COLON) 4098 column_def = self._parse_column_def(this) 4099 4100 if type_required and ( 4101 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4102 ): 4103 self._retreat(index) 4104 return self._parse_types() 4105 4106 return column_def 4107 4108 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4109 if not self._match_text_seq("AT", "TIME", "ZONE"): 4110 return this 4111 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4112 4113 def _parse_column(self) -> t.Optional[exp.Expression]: 4114 this = self._parse_column_reference() 4115 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4116 4117 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4118 this = self._parse_field() 4119 if ( 4120 not this 4121 and self._match(TokenType.VALUES, advance=False) 4122 and self.VALUES_FOLLOWED_BY_PAREN 4123 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4124 ): 4125 this = self._parse_id_var() 4126 4127 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 4128 4129 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4130 this = self._parse_bracket(this) 4131 4132 while self._match_set(self.COLUMN_OPERATORS): 4133 op_token = self._prev.token_type 4134 op = self.COLUMN_OPERATORS.get(op_token) 4135 4136 if op_token == TokenType.DCOLON: 4137 field = self._parse_types() 4138 if not field: 4139 self.raise_error("Expected type") 4140 elif op and self._curr: 4141 field = self._parse_column_reference() 4142 else: 4143 field = self._parse_field(anonymous_func=True, any_token=True) 4144 4145 if isinstance(field, exp.Func) and this: 4146 # bigquery allows function calls like x.y.count(...) 4147 # SAFE.SUBSTR(...) 4148 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4149 this = exp.replace_tree( 4150 this, 4151 lambda n: ( 4152 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4153 if n.table 4154 else n.this 4155 ) 4156 if isinstance(n, exp.Column) 4157 else n, 4158 ) 4159 4160 if op: 4161 this = op(self, this, field) 4162 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4163 this = self.expression( 4164 exp.Column, 4165 this=field, 4166 table=this.this, 4167 db=this.args.get("table"), 4168 catalog=this.args.get("db"), 4169 ) 4170 else: 4171 this = self.expression(exp.Dot, this=this, expression=field) 4172 this = self._parse_bracket(this) 4173 return this 4174 4175 def _parse_primary(self) -> t.Optional[exp.Expression]: 4176 if self._match_set(self.PRIMARY_PARSERS): 4177 token_type = self._prev.token_type 4178 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4179 4180 if token_type == TokenType.STRING: 4181 expressions = [primary] 4182 while self._match(TokenType.STRING): 4183 expressions.append(exp.Literal.string(self._prev.text)) 4184 4185 if len(expressions) > 1: 4186 return self.expression(exp.Concat, expressions=expressions) 4187 4188 return primary 4189 4190 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4191 return exp.Literal.number(f"0.{self._prev.text}") 4192 4193 if self._match(TokenType.L_PAREN): 4194 comments = self._prev_comments 4195 query = self._parse_select() 4196 4197 if query: 4198 expressions = [query] 4199 else: 4200 expressions = self._parse_expressions() 4201 4202 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4203 4204 if isinstance(this, exp.UNWRAPPED_QUERIES): 4205 this = self._parse_set_operations( 4206 self._parse_subquery(this=this, parse_alias=False) 4207 ) 4208 elif isinstance(this, exp.Subquery): 4209 this = self._parse_subquery( 4210 this=self._parse_set_operations(this), parse_alias=False 4211 ) 4212 elif len(expressions) > 1: 4213 this = self.expression(exp.Tuple, expressions=expressions) 4214 else: 4215 this = self.expression(exp.Paren, this=this) 4216 4217 if this: 4218 this.add_comments(comments) 4219 4220 self._match_r_paren(expression=this) 4221 return this 4222 4223 return None 4224 4225 def _parse_field( 4226 self, 4227 any_token: bool = False, 4228 tokens: t.Optional[t.Collection[TokenType]] = None, 4229 anonymous_func: bool = False, 4230 ) -> t.Optional[exp.Expression]: 4231 return ( 4232 self._parse_primary() 4233 or self._parse_function(anonymous=anonymous_func) 4234 or self._parse_id_var(any_token=any_token, tokens=tokens) 4235 ) 4236 4237 def _parse_function( 4238 self, 4239 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4240 anonymous: bool = False, 4241 optional_parens: bool = True, 4242 ) -> t.Optional[exp.Expression]: 4243 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4244 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4245 fn_syntax = False 4246 if ( 4247 self._match(TokenType.L_BRACE, advance=False) 4248 and self._next 4249 and self._next.text.upper() == "FN" 4250 ): 4251 self._advance(2) 4252 fn_syntax = True 4253 4254 func = self._parse_function_call( 4255 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4256 ) 4257 4258 if fn_syntax: 4259 self._match(TokenType.R_BRACE) 4260 4261 return func 4262 4263 def _parse_function_call( 4264 self, 4265 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4266 anonymous: bool = False, 4267 optional_parens: bool = True, 4268 ) -> t.Optional[exp.Expression]: 4269 if not self._curr: 4270 return None 4271 4272 comments = self._curr.comments 4273 token_type = self._curr.token_type 4274 this = self._curr.text 4275 upper = this.upper() 4276 4277 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4278 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4279 self._advance() 4280 return self._parse_window(parser(self)) 4281 4282 if not self._next or self._next.token_type != TokenType.L_PAREN: 4283 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4284 self._advance() 4285 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4286 4287 return None 4288 4289 if token_type not in self.FUNC_TOKENS: 4290 return None 4291 4292 self._advance(2) 4293 4294 parser = self.FUNCTION_PARSERS.get(upper) 4295 if parser and not anonymous: 4296 this = parser(self) 4297 else: 4298 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4299 4300 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4301 this = self.expression(subquery_predicate, this=self._parse_select()) 4302 self._match_r_paren() 4303 return this 4304 4305 if functions is None: 4306 functions = self.FUNCTIONS 4307 4308 function = functions.get(upper) 4309 4310 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4311 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4312 4313 if alias: 4314 args = self._kv_to_prop_eq(args) 4315 4316 if function and not anonymous: 4317 if "dialect" in function.__code__.co_varnames: 4318 func = function(args, dialect=self.dialect) 4319 else: 4320 func = function(args) 4321 4322 func = self.validate_expression(func, args) 4323 if not self.dialect.NORMALIZE_FUNCTIONS: 4324 func.meta["name"] = this 4325 4326 this = func 4327 else: 4328 if token_type == TokenType.IDENTIFIER: 4329 this = exp.Identifier(this=this, quoted=True) 4330 this = self.expression(exp.Anonymous, this=this, expressions=args) 4331 4332 if isinstance(this, exp.Expression): 4333 this.add_comments(comments) 4334 4335 self._match_r_paren(this) 4336 return self._parse_window(this) 4337 4338 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4339 transformed = [] 4340 4341 for e in expressions: 4342 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4343 if isinstance(e, exp.Alias): 4344 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4345 4346 if not isinstance(e, exp.PropertyEQ): 4347 e = self.expression( 4348 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4349 ) 4350 4351 if isinstance(e.this, exp.Column): 4352 e.this.replace(e.this.this) 4353 4354 transformed.append(e) 4355 4356 return transformed 4357 4358 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4359 return self._parse_column_def(self._parse_id_var()) 4360 4361 def _parse_user_defined_function( 4362 self, kind: t.Optional[TokenType] = None 4363 ) -> t.Optional[exp.Expression]: 4364 this = self._parse_id_var() 4365 4366 while self._match(TokenType.DOT): 4367 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4368 4369 if not self._match(TokenType.L_PAREN): 4370 return this 4371 4372 expressions = self._parse_csv(self._parse_function_parameter) 4373 self._match_r_paren() 4374 return self.expression( 4375 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4376 ) 4377 4378 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4379 literal = self._parse_primary() 4380 if literal: 4381 return self.expression(exp.Introducer, this=token.text, expression=literal) 4382 4383 return self.expression(exp.Identifier, this=token.text) 4384 4385 def _parse_session_parameter(self) -> exp.SessionParameter: 4386 kind = None 4387 this = self._parse_id_var() or self._parse_primary() 4388 4389 if this and self._match(TokenType.DOT): 4390 kind = this.name 4391 this = self._parse_var() or self._parse_primary() 4392 4393 return self.expression(exp.SessionParameter, this=this, kind=kind) 4394 4395 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4396 index = self._index 4397 4398 if self._match(TokenType.L_PAREN): 4399 expressions = t.cast( 4400 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4401 ) 4402 4403 if not self._match(TokenType.R_PAREN): 4404 self._retreat(index) 4405 else: 4406 expressions = [self._parse_id_var()] 4407 4408 if self._match_set(self.LAMBDAS): 4409 return self.LAMBDAS[self._prev.token_type](self, expressions) 4410 4411 self._retreat(index) 4412 4413 this: t.Optional[exp.Expression] 4414 4415 if self._match(TokenType.DISTINCT): 4416 this = self.expression( 4417 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4418 ) 4419 else: 4420 this = self._parse_select_or_expression(alias=alias) 4421 4422 return self._parse_limit( 4423 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4424 ) 4425 4426 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4427 index = self._index 4428 4429 if not self.errors: 4430 try: 4431 if self._parse_select(nested=True): 4432 return this 4433 except ParseError: 4434 pass 4435 finally: 4436 self.errors.clear() 4437 self._retreat(index) 4438 4439 if not self._match(TokenType.L_PAREN): 4440 return this 4441 4442 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4443 4444 self._match_r_paren() 4445 return self.expression(exp.Schema, this=this, expressions=args) 4446 4447 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4448 return self._parse_column_def(self._parse_field(any_token=True)) 4449 4450 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4451 # column defs are not really columns, they're identifiers 4452 if isinstance(this, exp.Column): 4453 this = this.this 4454 4455 kind = self._parse_types(schema=True) 4456 4457 if self._match_text_seq("FOR", "ORDINALITY"): 4458 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4459 4460 constraints: t.List[exp.Expression] = [] 4461 4462 if not kind and self._match(TokenType.ALIAS): 4463 constraints.append( 4464 self.expression( 4465 exp.ComputedColumnConstraint, 4466 this=self._parse_conjunction(), 4467 persisted=self._match_text_seq("PERSISTED"), 4468 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4469 ) 4470 ) 4471 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4472 self._match(TokenType.ALIAS) 4473 constraints.append( 4474 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4475 ) 4476 4477 while True: 4478 constraint = self._parse_column_constraint() 4479 if not constraint: 4480 break 4481 constraints.append(constraint) 4482 4483 if not kind and not constraints: 4484 return this 4485 4486 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4487 4488 def _parse_auto_increment( 4489 self, 4490 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4491 start = None 4492 increment = None 4493 4494 if self._match(TokenType.L_PAREN, advance=False): 4495 args = self._parse_wrapped_csv(self._parse_bitwise) 4496 start = seq_get(args, 0) 4497 increment = seq_get(args, 1) 4498 elif self._match_text_seq("START"): 4499 start = self._parse_bitwise() 4500 self._match_text_seq("INCREMENT") 4501 increment = self._parse_bitwise() 4502 4503 if start and increment: 4504 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4505 4506 return exp.AutoIncrementColumnConstraint() 4507 4508 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4509 if not self._match_text_seq("REFRESH"): 4510 self._retreat(self._index - 1) 4511 return None 4512 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4513 4514 def _parse_compress(self) -> exp.CompressColumnConstraint: 4515 if self._match(TokenType.L_PAREN, advance=False): 4516 return self.expression( 4517 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4518 ) 4519 4520 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4521 4522 def _parse_generated_as_identity( 4523 self, 4524 ) -> ( 4525 exp.GeneratedAsIdentityColumnConstraint 4526 | exp.ComputedColumnConstraint 4527 | exp.GeneratedAsRowColumnConstraint 4528 ): 4529 if self._match_text_seq("BY", "DEFAULT"): 4530 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4531 this = self.expression( 4532 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4533 ) 4534 else: 4535 self._match_text_seq("ALWAYS") 4536 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4537 4538 self._match(TokenType.ALIAS) 4539 4540 if self._match_text_seq("ROW"): 4541 start = self._match_text_seq("START") 4542 if not start: 4543 self._match(TokenType.END) 4544 hidden = self._match_text_seq("HIDDEN") 4545 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4546 4547 identity = self._match_text_seq("IDENTITY") 4548 4549 if self._match(TokenType.L_PAREN): 4550 if self._match(TokenType.START_WITH): 4551 this.set("start", self._parse_bitwise()) 4552 if self._match_text_seq("INCREMENT", "BY"): 4553 this.set("increment", self._parse_bitwise()) 4554 if self._match_text_seq("MINVALUE"): 4555 this.set("minvalue", self._parse_bitwise()) 4556 if self._match_text_seq("MAXVALUE"): 4557 this.set("maxvalue", self._parse_bitwise()) 4558 4559 if self._match_text_seq("CYCLE"): 4560 this.set("cycle", True) 4561 elif self._match_text_seq("NO", "CYCLE"): 4562 this.set("cycle", False) 4563 4564 if not identity: 4565 this.set("expression", self._parse_bitwise()) 4566 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4567 args = self._parse_csv(self._parse_bitwise) 4568 this.set("start", seq_get(args, 0)) 4569 this.set("increment", seq_get(args, 1)) 4570 4571 self._match_r_paren() 4572 4573 return this 4574 4575 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4576 self._match_text_seq("LENGTH") 4577 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4578 4579 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4580 if self._match_text_seq("NULL"): 4581 return self.expression(exp.NotNullColumnConstraint) 4582 if self._match_text_seq("CASESPECIFIC"): 4583 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4584 if self._match_text_seq("FOR", "REPLICATION"): 4585 return self.expression(exp.NotForReplicationColumnConstraint) 4586 return None 4587 4588 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4589 if self._match(TokenType.CONSTRAINT): 4590 this = self._parse_id_var() 4591 else: 4592 this = None 4593 4594 if self._match_texts(self.CONSTRAINT_PARSERS): 4595 return self.expression( 4596 exp.ColumnConstraint, 4597 this=this, 4598 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4599 ) 4600 4601 return this 4602 4603 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4604 if not self._match(TokenType.CONSTRAINT): 4605 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4606 4607 return self.expression( 4608 exp.Constraint, 4609 this=self._parse_id_var(), 4610 expressions=self._parse_unnamed_constraints(), 4611 ) 4612 4613 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4614 constraints = [] 4615 while True: 4616 constraint = self._parse_unnamed_constraint() or self._parse_function() 4617 if not constraint: 4618 break 4619 constraints.append(constraint) 4620 4621 return constraints 4622 4623 def _parse_unnamed_constraint( 4624 self, constraints: t.Optional[t.Collection[str]] = None 4625 ) -> t.Optional[exp.Expression]: 4626 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4627 constraints or self.CONSTRAINT_PARSERS 4628 ): 4629 return None 4630 4631 constraint = self._prev.text.upper() 4632 if constraint not in self.CONSTRAINT_PARSERS: 4633 self.raise_error(f"No parser found for schema constraint {constraint}.") 4634 4635 return self.CONSTRAINT_PARSERS[constraint](self) 4636 4637 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4638 self._match_text_seq("KEY") 4639 return self.expression( 4640 exp.UniqueColumnConstraint, 4641 this=self._parse_schema(self._parse_id_var(any_token=False)), 4642 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4643 on_conflict=self._parse_on_conflict(), 4644 ) 4645 4646 def _parse_key_constraint_options(self) -> t.List[str]: 4647 options = [] 4648 while True: 4649 if not self._curr: 4650 break 4651 4652 if self._match(TokenType.ON): 4653 action = None 4654 on = self._advance_any() and self._prev.text 4655 4656 if self._match_text_seq("NO", "ACTION"): 4657 action = "NO ACTION" 4658 elif self._match_text_seq("CASCADE"): 4659 action = "CASCADE" 4660 elif self._match_text_seq("RESTRICT"): 4661 action = "RESTRICT" 4662 elif self._match_pair(TokenType.SET, TokenType.NULL): 4663 action = "SET NULL" 4664 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4665 action = "SET DEFAULT" 4666 else: 4667 self.raise_error("Invalid key constraint") 4668 4669 options.append(f"ON {on} {action}") 4670 elif self._match_text_seq("NOT", "ENFORCED"): 4671 options.append("NOT ENFORCED") 4672 elif self._match_text_seq("DEFERRABLE"): 4673 options.append("DEFERRABLE") 4674 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4675 options.append("INITIALLY DEFERRED") 4676 elif self._match_text_seq("NORELY"): 4677 options.append("NORELY") 4678 elif self._match_text_seq("MATCH", "FULL"): 4679 options.append("MATCH FULL") 4680 else: 4681 break 4682 4683 return options 4684 4685 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4686 if match and not self._match(TokenType.REFERENCES): 4687 return None 4688 4689 expressions = None 4690 this = self._parse_table(schema=True) 4691 options = self._parse_key_constraint_options() 4692 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4693 4694 def _parse_foreign_key(self) -> exp.ForeignKey: 4695 expressions = self._parse_wrapped_id_vars() 4696 reference = self._parse_references() 4697 options = {} 4698 4699 while self._match(TokenType.ON): 4700 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4701 self.raise_error("Expected DELETE or UPDATE") 4702 4703 kind = self._prev.text.lower() 4704 4705 if self._match_text_seq("NO", "ACTION"): 4706 action = "NO ACTION" 4707 elif self._match(TokenType.SET): 4708 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4709 action = "SET " + self._prev.text.upper() 4710 else: 4711 self._advance() 4712 action = self._prev.text.upper() 4713 4714 options[kind] = action 4715 4716 return self.expression( 4717 exp.ForeignKey, 4718 expressions=expressions, 4719 reference=reference, 4720 **options, # type: ignore 4721 ) 4722 4723 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4724 return self._parse_field() 4725 4726 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4727 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4728 self._retreat(self._index - 1) 4729 return None 4730 4731 id_vars = self._parse_wrapped_id_vars() 4732 return self.expression( 4733 exp.PeriodForSystemTimeConstraint, 4734 this=seq_get(id_vars, 0), 4735 expression=seq_get(id_vars, 1), 4736 ) 4737 4738 def _parse_primary_key( 4739 self, wrapped_optional: bool = False, in_props: bool = False 4740 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4741 desc = ( 4742 self._match_set((TokenType.ASC, TokenType.DESC)) 4743 and self._prev.token_type == TokenType.DESC 4744 ) 4745 4746 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4747 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4748 4749 expressions = self._parse_wrapped_csv( 4750 self._parse_primary_key_part, optional=wrapped_optional 4751 ) 4752 options = self._parse_key_constraint_options() 4753 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4754 4755 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4756 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4757 4758 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4759 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4760 return this 4761 4762 bracket_kind = self._prev.token_type 4763 expressions = self._parse_csv( 4764 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4765 ) 4766 4767 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4768 self.raise_error("Expected ]") 4769 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4770 self.raise_error("Expected }") 4771 4772 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4773 if bracket_kind == TokenType.L_BRACE: 4774 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4775 elif not this or this.name.upper() == "ARRAY": 4776 this = self.expression(exp.Array, expressions=expressions) 4777 else: 4778 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4779 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4780 4781 self._add_comments(this) 4782 return self._parse_bracket(this) 4783 4784 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4785 if self._match(TokenType.COLON): 4786 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4787 return this 4788 4789 def _parse_case(self) -> t.Optional[exp.Expression]: 4790 ifs = [] 4791 default = None 4792 4793 comments = self._prev_comments 4794 expression = self._parse_conjunction() 4795 4796 while self._match(TokenType.WHEN): 4797 this = self._parse_conjunction() 4798 self._match(TokenType.THEN) 4799 then = self._parse_conjunction() 4800 ifs.append(self.expression(exp.If, this=this, true=then)) 4801 4802 if self._match(TokenType.ELSE): 4803 default = self._parse_conjunction() 4804 4805 if not self._match(TokenType.END): 4806 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4807 default = exp.column("interval") 4808 else: 4809 self.raise_error("Expected END after CASE", self._prev) 4810 4811 return self.expression( 4812 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4813 ) 4814 4815 def _parse_if(self) -> t.Optional[exp.Expression]: 4816 if self._match(TokenType.L_PAREN): 4817 args = self._parse_csv(self._parse_conjunction) 4818 this = self.validate_expression(exp.If.from_arg_list(args), args) 4819 self._match_r_paren() 4820 else: 4821 index = self._index - 1 4822 4823 if self.NO_PAREN_IF_COMMANDS and index == 0: 4824 return self._parse_as_command(self._prev) 4825 4826 condition = self._parse_conjunction() 4827 4828 if not condition: 4829 self._retreat(index) 4830 return None 4831 4832 self._match(TokenType.THEN) 4833 true = self._parse_conjunction() 4834 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4835 self._match(TokenType.END) 4836 this = self.expression(exp.If, this=condition, true=true, false=false) 4837 4838 return this 4839 4840 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4841 if not self._match_text_seq("VALUE", "FOR"): 4842 self._retreat(self._index - 1) 4843 return None 4844 4845 return self.expression( 4846 exp.NextValueFor, 4847 this=self._parse_column(), 4848 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4849 ) 4850 4851 def _parse_extract(self) -> exp.Extract: 4852 this = self._parse_function() or self._parse_var() or self._parse_type() 4853 4854 if self._match(TokenType.FROM): 4855 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4856 4857 if not self._match(TokenType.COMMA): 4858 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4859 4860 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4861 4862 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4863 this = self._parse_conjunction() 4864 4865 if not self._match(TokenType.ALIAS): 4866 if self._match(TokenType.COMMA): 4867 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4868 4869 self.raise_error("Expected AS after CAST") 4870 4871 fmt = None 4872 to = self._parse_types() 4873 4874 if self._match(TokenType.FORMAT): 4875 fmt_string = self._parse_string() 4876 fmt = self._parse_at_time_zone(fmt_string) 4877 4878 if not to: 4879 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4880 if to.this in exp.DataType.TEMPORAL_TYPES: 4881 this = self.expression( 4882 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4883 this=this, 4884 format=exp.Literal.string( 4885 format_time( 4886 fmt_string.this if fmt_string else "", 4887 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4888 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4889 ) 4890 ), 4891 ) 4892 4893 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4894 this.set("zone", fmt.args["zone"]) 4895 return this 4896 elif not to: 4897 self.raise_error("Expected TYPE after CAST") 4898 elif isinstance(to, exp.Identifier): 4899 to = exp.DataType.build(to.name, udt=True) 4900 elif to.this == exp.DataType.Type.CHAR: 4901 if self._match(TokenType.CHARACTER_SET): 4902 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4903 4904 return self.expression( 4905 exp.Cast if strict else exp.TryCast, 4906 this=this, 4907 to=to, 4908 format=fmt, 4909 safe=safe, 4910 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 4911 ) 4912 4913 def _parse_string_agg(self) -> exp.Expression: 4914 if self._match(TokenType.DISTINCT): 4915 args: t.List[t.Optional[exp.Expression]] = [ 4916 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4917 ] 4918 if self._match(TokenType.COMMA): 4919 args.extend(self._parse_csv(self._parse_conjunction)) 4920 else: 4921 args = self._parse_csv(self._parse_conjunction) # type: ignore 4922 4923 index = self._index 4924 if not self._match(TokenType.R_PAREN) and args: 4925 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4926 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4927 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4928 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4929 4930 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4931 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4932 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4933 if not self._match_text_seq("WITHIN", "GROUP"): 4934 self._retreat(index) 4935 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4936 4937 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4938 order = self._parse_order(this=seq_get(args, 0)) 4939 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4940 4941 def _parse_convert( 4942 self, strict: bool, safe: t.Optional[bool] = None 4943 ) -> t.Optional[exp.Expression]: 4944 this = self._parse_bitwise() 4945 4946 if self._match(TokenType.USING): 4947 to: t.Optional[exp.Expression] = self.expression( 4948 exp.CharacterSet, this=self._parse_var() 4949 ) 4950 elif self._match(TokenType.COMMA): 4951 to = self._parse_types() 4952 else: 4953 to = None 4954 4955 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4956 4957 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4958 """ 4959 There are generally two variants of the DECODE function: 4960 4961 - DECODE(bin, charset) 4962 - DECODE(expression, search, result [, search, result] ... [, default]) 4963 4964 The second variant will always be parsed into a CASE expression. Note that NULL 4965 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4966 instead of relying on pattern matching. 4967 """ 4968 args = self._parse_csv(self._parse_conjunction) 4969 4970 if len(args) < 3: 4971 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4972 4973 expression, *expressions = args 4974 if not expression: 4975 return None 4976 4977 ifs = [] 4978 for search, result in zip(expressions[::2], expressions[1::2]): 4979 if not search or not result: 4980 return None 4981 4982 if isinstance(search, exp.Literal): 4983 ifs.append( 4984 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4985 ) 4986 elif isinstance(search, exp.Null): 4987 ifs.append( 4988 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4989 ) 4990 else: 4991 cond = exp.or_( 4992 exp.EQ(this=expression.copy(), expression=search), 4993 exp.and_( 4994 exp.Is(this=expression.copy(), expression=exp.Null()), 4995 exp.Is(this=search.copy(), expression=exp.Null()), 4996 copy=False, 4997 ), 4998 copy=False, 4999 ) 5000 ifs.append(exp.If(this=cond, true=result)) 5001 5002 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5003 5004 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5005 self._match_text_seq("KEY") 5006 key = self._parse_column() 5007 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5008 self._match_text_seq("VALUE") 5009 value = self._parse_bitwise() 5010 5011 if not key and not value: 5012 return None 5013 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5014 5015 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5016 if not this or not self._match_text_seq("FORMAT", "JSON"): 5017 return this 5018 5019 return self.expression(exp.FormatJson, this=this) 5020 5021 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5022 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5023 for value in values: 5024 if self._match_text_seq(value, "ON", on): 5025 return f"{value} ON {on}" 5026 5027 return None 5028 5029 @t.overload 5030 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5031 5032 @t.overload 5033 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5034 5035 def _parse_json_object(self, agg=False): 5036 star = self._parse_star() 5037 expressions = ( 5038 [star] 5039 if star 5040 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5041 ) 5042 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5043 5044 unique_keys = None 5045 if self._match_text_seq("WITH", "UNIQUE"): 5046 unique_keys = True 5047 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5048 unique_keys = False 5049 5050 self._match_text_seq("KEYS") 5051 5052 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5053 self._parse_type() 5054 ) 5055 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5056 5057 return self.expression( 5058 exp.JSONObjectAgg if agg else exp.JSONObject, 5059 expressions=expressions, 5060 null_handling=null_handling, 5061 unique_keys=unique_keys, 5062 return_type=return_type, 5063 encoding=encoding, 5064 ) 5065 5066 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5067 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5068 if not self._match_text_seq("NESTED"): 5069 this = self._parse_id_var() 5070 kind = self._parse_types(allow_identifiers=False) 5071 nested = None 5072 else: 5073 this = None 5074 kind = None 5075 nested = True 5076 5077 path = self._match_text_seq("PATH") and self._parse_string() 5078 nested_schema = nested and self._parse_json_schema() 5079 5080 return self.expression( 5081 exp.JSONColumnDef, 5082 this=this, 5083 kind=kind, 5084 path=path, 5085 nested_schema=nested_schema, 5086 ) 5087 5088 def _parse_json_schema(self) -> exp.JSONSchema: 5089 self._match_text_seq("COLUMNS") 5090 return self.expression( 5091 exp.JSONSchema, 5092 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5093 ) 5094 5095 def _parse_json_table(self) -> exp.JSONTable: 5096 this = self._parse_format_json(self._parse_bitwise()) 5097 path = self._match(TokenType.COMMA) and self._parse_string() 5098 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5099 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5100 schema = self._parse_json_schema() 5101 5102 return exp.JSONTable( 5103 this=this, 5104 schema=schema, 5105 path=path, 5106 error_handling=error_handling, 5107 empty_handling=empty_handling, 5108 ) 5109 5110 def _parse_match_against(self) -> exp.MatchAgainst: 5111 expressions = self._parse_csv(self._parse_column) 5112 5113 self._match_text_seq(")", "AGAINST", "(") 5114 5115 this = self._parse_string() 5116 5117 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5118 modifier = "IN NATURAL LANGUAGE MODE" 5119 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5120 modifier = f"{modifier} WITH QUERY EXPANSION" 5121 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5122 modifier = "IN BOOLEAN MODE" 5123 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5124 modifier = "WITH QUERY EXPANSION" 5125 else: 5126 modifier = None 5127 5128 return self.expression( 5129 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5130 ) 5131 5132 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5133 def _parse_open_json(self) -> exp.OpenJSON: 5134 this = self._parse_bitwise() 5135 path = self._match(TokenType.COMMA) and self._parse_string() 5136 5137 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5138 this = self._parse_field(any_token=True) 5139 kind = self._parse_types() 5140 path = self._parse_string() 5141 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5142 5143 return self.expression( 5144 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5145 ) 5146 5147 expressions = None 5148 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5149 self._match_l_paren() 5150 expressions = self._parse_csv(_parse_open_json_column_def) 5151 5152 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5153 5154 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5155 args = self._parse_csv(self._parse_bitwise) 5156 5157 if self._match(TokenType.IN): 5158 return self.expression( 5159 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5160 ) 5161 5162 if haystack_first: 5163 haystack = seq_get(args, 0) 5164 needle = seq_get(args, 1) 5165 else: 5166 needle = seq_get(args, 0) 5167 haystack = seq_get(args, 1) 5168 5169 return self.expression( 5170 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5171 ) 5172 5173 def _parse_predict(self) -> exp.Predict: 5174 self._match_text_seq("MODEL") 5175 this = self._parse_table() 5176 5177 self._match(TokenType.COMMA) 5178 self._match_text_seq("TABLE") 5179 5180 return self.expression( 5181 exp.Predict, 5182 this=this, 5183 expression=self._parse_table(), 5184 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5185 ) 5186 5187 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5188 args = self._parse_csv(self._parse_table) 5189 return exp.JoinHint(this=func_name.upper(), expressions=args) 5190 5191 def _parse_substring(self) -> exp.Substring: 5192 # Postgres supports the form: substring(string [from int] [for int]) 5193 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5194 5195 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5196 5197 if self._match(TokenType.FROM): 5198 args.append(self._parse_bitwise()) 5199 if self._match(TokenType.FOR): 5200 args.append(self._parse_bitwise()) 5201 5202 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5203 5204 def _parse_trim(self) -> exp.Trim: 5205 # https://www.w3resource.com/sql/character-functions/trim.php 5206 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5207 5208 position = None 5209 collation = None 5210 expression = None 5211 5212 if self._match_texts(self.TRIM_TYPES): 5213 position = self._prev.text.upper() 5214 5215 this = self._parse_bitwise() 5216 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5217 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5218 expression = self._parse_bitwise() 5219 5220 if invert_order: 5221 this, expression = expression, this 5222 5223 if self._match(TokenType.COLLATE): 5224 collation = self._parse_bitwise() 5225 5226 return self.expression( 5227 exp.Trim, this=this, position=position, expression=expression, collation=collation 5228 ) 5229 5230 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5231 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5232 5233 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5234 return self._parse_window(self._parse_id_var(), alias=True) 5235 5236 def _parse_respect_or_ignore_nulls( 5237 self, this: t.Optional[exp.Expression] 5238 ) -> t.Optional[exp.Expression]: 5239 if self._match_text_seq("IGNORE", "NULLS"): 5240 return self.expression(exp.IgnoreNulls, this=this) 5241 if self._match_text_seq("RESPECT", "NULLS"): 5242 return self.expression(exp.RespectNulls, this=this) 5243 return this 5244 5245 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5246 if self._match(TokenType.HAVING): 5247 self._match_texts(("MAX", "MIN")) 5248 max = self._prev.text.upper() != "MIN" 5249 return self.expression( 5250 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5251 ) 5252 5253 return this 5254 5255 def _parse_window( 5256 self, this: t.Optional[exp.Expression], alias: bool = False 5257 ) -> t.Optional[exp.Expression]: 5258 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5259 self._match(TokenType.WHERE) 5260 this = self.expression( 5261 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5262 ) 5263 self._match_r_paren() 5264 5265 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5266 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5267 if self._match_text_seq("WITHIN", "GROUP"): 5268 order = self._parse_wrapped(self._parse_order) 5269 this = self.expression(exp.WithinGroup, this=this, expression=order) 5270 5271 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5272 # Some dialects choose to implement and some do not. 5273 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5274 5275 # There is some code above in _parse_lambda that handles 5276 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5277 5278 # The below changes handle 5279 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5280 5281 # Oracle allows both formats 5282 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5283 # and Snowflake chose to do the same for familiarity 5284 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5285 if isinstance(this, exp.AggFunc): 5286 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5287 5288 if ignore_respect and ignore_respect is not this: 5289 ignore_respect.replace(ignore_respect.this) 5290 this = self.expression(ignore_respect.__class__, this=this) 5291 5292 this = self._parse_respect_or_ignore_nulls(this) 5293 5294 # bigquery select from window x AS (partition by ...) 5295 if alias: 5296 over = None 5297 self._match(TokenType.ALIAS) 5298 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5299 return this 5300 else: 5301 over = self._prev.text.upper() 5302 5303 if not self._match(TokenType.L_PAREN): 5304 return self.expression( 5305 exp.Window, this=this, alias=self._parse_id_var(False), over=over 5306 ) 5307 5308 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5309 5310 first = self._match(TokenType.FIRST) 5311 if self._match_text_seq("LAST"): 5312 first = False 5313 5314 partition, order = self._parse_partition_and_order() 5315 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5316 5317 if kind: 5318 self._match(TokenType.BETWEEN) 5319 start = self._parse_window_spec() 5320 self._match(TokenType.AND) 5321 end = self._parse_window_spec() 5322 5323 spec = self.expression( 5324 exp.WindowSpec, 5325 kind=kind, 5326 start=start["value"], 5327 start_side=start["side"], 5328 end=end["value"], 5329 end_side=end["side"], 5330 ) 5331 else: 5332 spec = None 5333 5334 self._match_r_paren() 5335 5336 window = self.expression( 5337 exp.Window, 5338 this=this, 5339 partition_by=partition, 5340 order=order, 5341 spec=spec, 5342 alias=window_alias, 5343 over=over, 5344 first=first, 5345 ) 5346 5347 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5348 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5349 return self._parse_window(window, alias=alias) 5350 5351 return window 5352 5353 def _parse_partition_and_order( 5354 self, 5355 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5356 return self._parse_partition_by(), self._parse_order() 5357 5358 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5359 self._match(TokenType.BETWEEN) 5360 5361 return { 5362 "value": ( 5363 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5364 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5365 or self._parse_bitwise() 5366 ), 5367 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5368 } 5369 5370 def _parse_alias( 5371 self, this: t.Optional[exp.Expression], explicit: bool = False 5372 ) -> t.Optional[exp.Expression]: 5373 any_token = self._match(TokenType.ALIAS) 5374 comments = self._prev_comments 5375 5376 if explicit and not any_token: 5377 return this 5378 5379 if self._match(TokenType.L_PAREN): 5380 aliases = self.expression( 5381 exp.Aliases, 5382 comments=comments, 5383 this=this, 5384 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5385 ) 5386 self._match_r_paren(aliases) 5387 return aliases 5388 5389 alias = self._parse_id_var(any_token) or ( 5390 self.STRING_ALIASES and self._parse_string_as_identifier() 5391 ) 5392 5393 if alias: 5394 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5395 column = this.this 5396 5397 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5398 if not this.comments and column and column.comments: 5399 this.comments = column.comments 5400 column.comments = None 5401 5402 return this 5403 5404 def _parse_id_var( 5405 self, 5406 any_token: bool = True, 5407 tokens: t.Optional[t.Collection[TokenType]] = None, 5408 ) -> t.Optional[exp.Expression]: 5409 identifier = self._parse_identifier() 5410 5411 if identifier: 5412 return identifier 5413 5414 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5415 quoted = self._prev.token_type == TokenType.STRING 5416 return exp.Identifier(this=self._prev.text, quoted=quoted) 5417 5418 return None 5419 5420 def _parse_string(self) -> t.Optional[exp.Expression]: 5421 if self._match_set(self.STRING_PARSERS): 5422 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5423 return self._parse_placeholder() 5424 5425 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5426 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5427 5428 def _parse_number(self) -> t.Optional[exp.Expression]: 5429 if self._match_set(self.NUMERIC_PARSERS): 5430 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5431 return self._parse_placeholder() 5432 5433 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5434 if self._match(TokenType.IDENTIFIER): 5435 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5436 return self._parse_placeholder() 5437 5438 def _parse_var( 5439 self, 5440 any_token: bool = False, 5441 tokens: t.Optional[t.Collection[TokenType]] = None, 5442 upper: bool = False, 5443 ) -> t.Optional[exp.Expression]: 5444 if ( 5445 (any_token and self._advance_any()) 5446 or self._match(TokenType.VAR) 5447 or (self._match_set(tokens) if tokens else False) 5448 ): 5449 return self.expression( 5450 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5451 ) 5452 return self._parse_placeholder() 5453 5454 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5455 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5456 self._advance() 5457 return self._prev 5458 return None 5459 5460 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5461 return self._parse_var() or self._parse_string() 5462 5463 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5464 return self._parse_primary() or self._parse_var(any_token=True) 5465 5466 def _parse_null(self) -> t.Optional[exp.Expression]: 5467 if self._match_set(self.NULL_TOKENS): 5468 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5469 return self._parse_placeholder() 5470 5471 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5472 if self._match(TokenType.TRUE): 5473 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5474 if self._match(TokenType.FALSE): 5475 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5476 return self._parse_placeholder() 5477 5478 def _parse_star(self) -> t.Optional[exp.Expression]: 5479 if self._match(TokenType.STAR): 5480 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5481 return self._parse_placeholder() 5482 5483 def _parse_parameter(self) -> exp.Parameter: 5484 self._match(TokenType.L_BRACE) 5485 this = self._parse_identifier() or self._parse_primary_or_var() 5486 expression = self._match(TokenType.COLON) and ( 5487 self._parse_identifier() or self._parse_primary_or_var() 5488 ) 5489 self._match(TokenType.R_BRACE) 5490 return self.expression(exp.Parameter, this=this, expression=expression) 5491 5492 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5493 if self._match_set(self.PLACEHOLDER_PARSERS): 5494 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5495 if placeholder: 5496 return placeholder 5497 self._advance(-1) 5498 return None 5499 5500 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5501 if not self._match(TokenType.EXCEPT): 5502 return None 5503 if self._match(TokenType.L_PAREN, advance=False): 5504 return self._parse_wrapped_csv(self._parse_column) 5505 5506 except_column = self._parse_column() 5507 return [except_column] if except_column else None 5508 5509 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5510 if not self._match(TokenType.REPLACE): 5511 return None 5512 if self._match(TokenType.L_PAREN, advance=False): 5513 return self._parse_wrapped_csv(self._parse_expression) 5514 5515 replace_expression = self._parse_expression() 5516 return [replace_expression] if replace_expression else None 5517 5518 def _parse_csv( 5519 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5520 ) -> t.List[exp.Expression]: 5521 parse_result = parse_method() 5522 items = [parse_result] if parse_result is not None else [] 5523 5524 while self._match(sep): 5525 self._add_comments(parse_result) 5526 parse_result = parse_method() 5527 if parse_result is not None: 5528 items.append(parse_result) 5529 5530 return items 5531 5532 def _parse_tokens( 5533 self, parse_method: t.Callable, expressions: t.Dict 5534 ) -> t.Optional[exp.Expression]: 5535 this = parse_method() 5536 5537 while self._match_set(expressions): 5538 this = self.expression( 5539 expressions[self._prev.token_type], 5540 this=this, 5541 comments=self._prev_comments, 5542 expression=parse_method(), 5543 ) 5544 5545 return this 5546 5547 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5548 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5549 5550 def _parse_wrapped_csv( 5551 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5552 ) -> t.List[exp.Expression]: 5553 return self._parse_wrapped( 5554 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5555 ) 5556 5557 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5558 wrapped = self._match(TokenType.L_PAREN) 5559 if not wrapped and not optional: 5560 self.raise_error("Expecting (") 5561 parse_result = parse_method() 5562 if wrapped: 5563 self._match_r_paren() 5564 return parse_result 5565 5566 def _parse_expressions(self) -> t.List[exp.Expression]: 5567 return self._parse_csv(self._parse_expression) 5568 5569 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5570 return self._parse_select() or self._parse_set_operations( 5571 self._parse_expression() if alias else self._parse_conjunction() 5572 ) 5573 5574 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5575 return self._parse_query_modifiers( 5576 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5577 ) 5578 5579 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5580 this = None 5581 if self._match_texts(self.TRANSACTION_KIND): 5582 this = self._prev.text 5583 5584 self._match_texts(("TRANSACTION", "WORK")) 5585 5586 modes = [] 5587 while True: 5588 mode = [] 5589 while self._match(TokenType.VAR): 5590 mode.append(self._prev.text) 5591 5592 if mode: 5593 modes.append(" ".join(mode)) 5594 if not self._match(TokenType.COMMA): 5595 break 5596 5597 return self.expression(exp.Transaction, this=this, modes=modes) 5598 5599 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5600 chain = None 5601 savepoint = None 5602 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5603 5604 self._match_texts(("TRANSACTION", "WORK")) 5605 5606 if self._match_text_seq("TO"): 5607 self._match_text_seq("SAVEPOINT") 5608 savepoint = self._parse_id_var() 5609 5610 if self._match(TokenType.AND): 5611 chain = not self._match_text_seq("NO") 5612 self._match_text_seq("CHAIN") 5613 5614 if is_rollback: 5615 return self.expression(exp.Rollback, savepoint=savepoint) 5616 5617 return self.expression(exp.Commit, chain=chain) 5618 5619 def _parse_refresh(self) -> exp.Refresh: 5620 self._match(TokenType.TABLE) 5621 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5622 5623 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5624 if not self._match_text_seq("ADD"): 5625 return None 5626 5627 self._match(TokenType.COLUMN) 5628 exists_column = self._parse_exists(not_=True) 5629 expression = self._parse_field_def() 5630 5631 if expression: 5632 expression.set("exists", exists_column) 5633 5634 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5635 if self._match_texts(("FIRST", "AFTER")): 5636 position = self._prev.text 5637 column_position = self.expression( 5638 exp.ColumnPosition, this=self._parse_column(), position=position 5639 ) 5640 expression.set("position", column_position) 5641 5642 return expression 5643 5644 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5645 drop = self._match(TokenType.DROP) and self._parse_drop() 5646 if drop and not isinstance(drop, exp.Command): 5647 drop.set("kind", drop.args.get("kind", "COLUMN")) 5648 return drop 5649 5650 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5651 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5652 return self.expression( 5653 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5654 ) 5655 5656 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5657 index = self._index - 1 5658 5659 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5660 return self._parse_csv( 5661 lambda: self.expression( 5662 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5663 ) 5664 ) 5665 5666 self._retreat(index) 5667 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5668 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5669 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5670 5671 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5672 self._match(TokenType.COLUMN) 5673 column = self._parse_field(any_token=True) 5674 5675 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5676 return self.expression(exp.AlterColumn, this=column, drop=True) 5677 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5678 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5679 if self._match(TokenType.COMMENT): 5680 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5681 5682 self._match_text_seq("SET", "DATA") 5683 return self.expression( 5684 exp.AlterColumn, 5685 this=column, 5686 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5687 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5688 using=self._match(TokenType.USING) and self._parse_conjunction(), 5689 ) 5690 5691 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5692 index = self._index - 1 5693 5694 partition_exists = self._parse_exists() 5695 if self._match(TokenType.PARTITION, advance=False): 5696 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5697 5698 self._retreat(index) 5699 return self._parse_csv(self._parse_drop_column) 5700 5701 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5702 if self._match(TokenType.COLUMN): 5703 exists = self._parse_exists() 5704 old_column = self._parse_column() 5705 to = self._match_text_seq("TO") 5706 new_column = self._parse_column() 5707 5708 if old_column is None or to is None or new_column is None: 5709 return None 5710 5711 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5712 5713 self._match_text_seq("TO") 5714 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5715 5716 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5717 start = self._prev 5718 5719 if not self._match(TokenType.TABLE): 5720 return self._parse_as_command(start) 5721 5722 exists = self._parse_exists() 5723 only = self._match_text_seq("ONLY") 5724 this = self._parse_table(schema=True) 5725 5726 if self._next: 5727 self._advance() 5728 5729 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5730 if parser: 5731 actions = ensure_list(parser(self)) 5732 options = self._parse_csv(self._parse_property) 5733 5734 if not self._curr and actions: 5735 return self.expression( 5736 exp.AlterTable, 5737 this=this, 5738 exists=exists, 5739 actions=actions, 5740 only=only, 5741 options=options, 5742 ) 5743 5744 return self._parse_as_command(start) 5745 5746 def _parse_merge(self) -> exp.Merge: 5747 self._match(TokenType.INTO) 5748 target = self._parse_table() 5749 5750 if target and self._match(TokenType.ALIAS, advance=False): 5751 target.set("alias", self._parse_table_alias()) 5752 5753 self._match(TokenType.USING) 5754 using = self._parse_table() 5755 5756 self._match(TokenType.ON) 5757 on = self._parse_conjunction() 5758 5759 return self.expression( 5760 exp.Merge, 5761 this=target, 5762 using=using, 5763 on=on, 5764 expressions=self._parse_when_matched(), 5765 ) 5766 5767 def _parse_when_matched(self) -> t.List[exp.When]: 5768 whens = [] 5769 5770 while self._match(TokenType.WHEN): 5771 matched = not self._match(TokenType.NOT) 5772 self._match_text_seq("MATCHED") 5773 source = ( 5774 False 5775 if self._match_text_seq("BY", "TARGET") 5776 else self._match_text_seq("BY", "SOURCE") 5777 ) 5778 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5779 5780 self._match(TokenType.THEN) 5781 5782 if self._match(TokenType.INSERT): 5783 _this = self._parse_star() 5784 if _this: 5785 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5786 else: 5787 then = self.expression( 5788 exp.Insert, 5789 this=self._parse_value(), 5790 expression=self._match_text_seq("VALUES") and self._parse_value(), 5791 ) 5792 elif self._match(TokenType.UPDATE): 5793 expressions = self._parse_star() 5794 if expressions: 5795 then = self.expression(exp.Update, expressions=expressions) 5796 else: 5797 then = self.expression( 5798 exp.Update, 5799 expressions=self._match(TokenType.SET) 5800 and self._parse_csv(self._parse_equality), 5801 ) 5802 elif self._match(TokenType.DELETE): 5803 then = self.expression(exp.Var, this=self._prev.text) 5804 else: 5805 then = None 5806 5807 whens.append( 5808 self.expression( 5809 exp.When, 5810 matched=matched, 5811 source=source, 5812 condition=condition, 5813 then=then, 5814 ) 5815 ) 5816 return whens 5817 5818 def _parse_show(self) -> t.Optional[exp.Expression]: 5819 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5820 if parser: 5821 return parser(self) 5822 return self._parse_as_command(self._prev) 5823 5824 def _parse_set_item_assignment( 5825 self, kind: t.Optional[str] = None 5826 ) -> t.Optional[exp.Expression]: 5827 index = self._index 5828 5829 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5830 return self._parse_set_transaction(global_=kind == "GLOBAL") 5831 5832 left = self._parse_primary() or self._parse_id_var() 5833 assignment_delimiter = self._match_texts(("=", "TO")) 5834 5835 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5836 self._retreat(index) 5837 return None 5838 5839 right = self._parse_statement() or self._parse_id_var() 5840 this = self.expression(exp.EQ, this=left, expression=right) 5841 5842 return self.expression(exp.SetItem, this=this, kind=kind) 5843 5844 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5845 self._match_text_seq("TRANSACTION") 5846 characteristics = self._parse_csv( 5847 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5848 ) 5849 return self.expression( 5850 exp.SetItem, 5851 expressions=characteristics, 5852 kind="TRANSACTION", 5853 **{"global": global_}, # type: ignore 5854 ) 5855 5856 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5857 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5858 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5859 5860 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5861 index = self._index 5862 set_ = self.expression( 5863 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5864 ) 5865 5866 if self._curr: 5867 self._retreat(index) 5868 return self._parse_as_command(self._prev) 5869 5870 return set_ 5871 5872 def _parse_var_from_options( 5873 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5874 ) -> t.Optional[exp.Var]: 5875 start = self._curr 5876 if not start: 5877 return None 5878 5879 option = start.text.upper() 5880 continuations = options.get(option) 5881 5882 index = self._index 5883 self._advance() 5884 for keywords in continuations or []: 5885 if isinstance(keywords, str): 5886 keywords = (keywords,) 5887 5888 if self._match_text_seq(*keywords): 5889 option = f"{option} {' '.join(keywords)}" 5890 break 5891 else: 5892 if continuations or continuations is None: 5893 if raise_unmatched: 5894 self.raise_error(f"Unknown option {option}") 5895 5896 self._retreat(index) 5897 return None 5898 5899 return exp.var(option) 5900 5901 def _parse_as_command(self, start: Token) -> exp.Command: 5902 while self._curr: 5903 self._advance() 5904 text = self._find_sql(start, self._prev) 5905 size = len(start.text) 5906 self._warn_unsupported() 5907 return exp.Command(this=text[:size], expression=text[size:]) 5908 5909 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5910 settings = [] 5911 5912 self._match_l_paren() 5913 kind = self._parse_id_var() 5914 5915 if self._match(TokenType.L_PAREN): 5916 while True: 5917 key = self._parse_id_var() 5918 value = self._parse_primary() 5919 5920 if not key and value is None: 5921 break 5922 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5923 self._match(TokenType.R_PAREN) 5924 5925 self._match_r_paren() 5926 5927 return self.expression( 5928 exp.DictProperty, 5929 this=this, 5930 kind=kind.this if kind else None, 5931 settings=settings, 5932 ) 5933 5934 def _parse_dict_range(self, this: str) -> exp.DictRange: 5935 self._match_l_paren() 5936 has_min = self._match_text_seq("MIN") 5937 if has_min: 5938 min = self._parse_var() or self._parse_primary() 5939 self._match_text_seq("MAX") 5940 max = self._parse_var() or self._parse_primary() 5941 else: 5942 max = self._parse_var() or self._parse_primary() 5943 min = exp.Literal.number(0) 5944 self._match_r_paren() 5945 return self.expression(exp.DictRange, this=this, min=min, max=max) 5946 5947 def _parse_comprehension( 5948 self, this: t.Optional[exp.Expression] 5949 ) -> t.Optional[exp.Comprehension]: 5950 index = self._index 5951 expression = self._parse_column() 5952 if not self._match(TokenType.IN): 5953 self._retreat(index - 1) 5954 return None 5955 iterator = self._parse_column() 5956 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5957 return self.expression( 5958 exp.Comprehension, 5959 this=this, 5960 expression=expression, 5961 iterator=iterator, 5962 condition=condition, 5963 ) 5964 5965 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5966 if self._match(TokenType.HEREDOC_STRING): 5967 return self.expression(exp.Heredoc, this=self._prev.text) 5968 5969 if not self._match_text_seq("$"): 5970 return None 5971 5972 tags = ["$"] 5973 tag_text = None 5974 5975 if self._is_connected(): 5976 self._advance() 5977 tags.append(self._prev.text.upper()) 5978 else: 5979 self.raise_error("No closing $ found") 5980 5981 if tags[-1] != "$": 5982 if self._is_connected() and self._match_text_seq("$"): 5983 tag_text = tags[-1] 5984 tags.append("$") 5985 else: 5986 self.raise_error("No closing $ found") 5987 5988 heredoc_start = self._curr 5989 5990 while self._curr: 5991 if self._match_text_seq(*tags, advance=False): 5992 this = self._find_sql(heredoc_start, self._prev) 5993 self._advance(len(tags)) 5994 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5995 5996 self._advance() 5997 5998 self.raise_error(f"No closing {''.join(tags)} found") 5999 return None 6000 6001 def _find_parser( 6002 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6003 ) -> t.Optional[t.Callable]: 6004 if not self._curr: 6005 return None 6006 6007 index = self._index 6008 this = [] 6009 while True: 6010 # The current token might be multiple words 6011 curr = self._curr.text.upper() 6012 key = curr.split(" ") 6013 this.append(curr) 6014 6015 self._advance() 6016 result, trie = in_trie(trie, key) 6017 if result == TrieResult.FAILED: 6018 break 6019 6020 if result == TrieResult.EXISTS: 6021 subparser = parsers[" ".join(this)] 6022 return subparser 6023 6024 self._retreat(index) 6025 return None 6026 6027 def _match(self, token_type, advance=True, expression=None): 6028 if not self._curr: 6029 return None 6030 6031 if self._curr.token_type == token_type: 6032 if advance: 6033 self._advance() 6034 self._add_comments(expression) 6035 return True 6036 6037 return None 6038 6039 def _match_set(self, types, advance=True): 6040 if not self._curr: 6041 return None 6042 6043 if self._curr.token_type in types: 6044 if advance: 6045 self._advance() 6046 return True 6047 6048 return None 6049 6050 def _match_pair(self, token_type_a, token_type_b, advance=True): 6051 if not self._curr or not self._next: 6052 return None 6053 6054 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6055 if advance: 6056 self._advance(2) 6057 return True 6058 6059 return None 6060 6061 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6062 if not self._match(TokenType.L_PAREN, expression=expression): 6063 self.raise_error("Expecting (") 6064 6065 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6066 if not self._match(TokenType.R_PAREN, expression=expression): 6067 self.raise_error("Expecting )") 6068 6069 def _match_texts(self, texts, advance=True): 6070 if self._curr and self._curr.text.upper() in texts: 6071 if advance: 6072 self._advance() 6073 return True 6074 return None 6075 6076 def _match_text_seq(self, *texts, advance=True): 6077 index = self._index 6078 for text in texts: 6079 if self._curr and self._curr.text.upper() == text: 6080 self._advance() 6081 else: 6082 self._retreat(index) 6083 return None 6084 6085 if not advance: 6086 self._retreat(index) 6087 6088 return True 6089 6090 def _replace_lambda( 6091 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6092 ) -> t.Optional[exp.Expression]: 6093 if not node: 6094 return node 6095 6096 for column in node.find_all(exp.Column): 6097 if column.parts[0].name in lambda_variables: 6098 dot_or_id = column.to_dot() if column.table else column.this 6099 parent = column.parent 6100 6101 while isinstance(parent, exp.Dot): 6102 if not isinstance(parent.parent, exp.Dot): 6103 parent.replace(dot_or_id) 6104 break 6105 parent = parent.parent 6106 else: 6107 if column is node: 6108 node = dot_or_id 6109 else: 6110 column.replace(dot_or_id) 6111 return node 6112 6113 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6114 start = self._prev 6115 6116 # Not to be confused with TRUNCATE(number, decimals) function call 6117 if self._match(TokenType.L_PAREN): 6118 self._retreat(self._index - 2) 6119 return self._parse_function() 6120 6121 # Clickhouse supports TRUNCATE DATABASE as well 6122 is_database = self._match(TokenType.DATABASE) 6123 6124 self._match(TokenType.TABLE) 6125 6126 exists = self._parse_exists(not_=False) 6127 6128 expressions = self._parse_csv( 6129 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6130 ) 6131 6132 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6133 6134 if self._match_text_seq("RESTART", "IDENTITY"): 6135 identity = "RESTART" 6136 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6137 identity = "CONTINUE" 6138 else: 6139 identity = None 6140 6141 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6142 option = self._prev.text 6143 else: 6144 option = None 6145 6146 partition = self._parse_partition() 6147 6148 # Fallback case 6149 if self._curr: 6150 return self._parse_as_command(start) 6151 6152 return self.expression( 6153 exp.TruncateTable, 6154 expressions=expressions, 6155 is_database=is_database, 6156 exists=exists, 6157 cluster=cluster, 6158 identity=identity, 6159 option=option, 6160 partition=partition, 6161 ) 6162 6163 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6164 this = self._parse_ordered(self._parse_opclass) 6165 6166 if not self._match(TokenType.WITH): 6167 return this 6168 6169 op = self._parse_var(any_token=True) 6170 6171 return self.expression(exp.WithOperator, this=this, op=op)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1115 def __init__( 1116 self, 1117 error_level: t.Optional[ErrorLevel] = None, 1118 error_message_context: int = 100, 1119 max_errors: int = 3, 1120 dialect: DialectType = None, 1121 ): 1122 from sqlglot.dialects import Dialect 1123 1124 self.error_level = error_level or ErrorLevel.IMMEDIATE 1125 self.error_message_context = error_message_context 1126 self.max_errors = max_errors 1127 self.dialect = Dialect.get_or_raise(dialect) 1128 self.reset()
1140 def parse( 1141 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1142 ) -> t.List[t.Optional[exp.Expression]]: 1143 """ 1144 Parses a list of tokens and returns a list of syntax trees, one tree 1145 per parsed SQL statement. 1146 1147 Args: 1148 raw_tokens: The list of tokens. 1149 sql: The original SQL string, used to produce helpful debug messages. 1150 1151 Returns: 1152 The list of the produced syntax trees. 1153 """ 1154 return self._parse( 1155 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1156 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1158 def parse_into( 1159 self, 1160 expression_types: exp.IntoType, 1161 raw_tokens: t.List[Token], 1162 sql: t.Optional[str] = None, 1163 ) -> t.List[t.Optional[exp.Expression]]: 1164 """ 1165 Parses a list of tokens into a given Expression type. If a collection of Expression 1166 types is given instead, this method will try to parse the token list into each one 1167 of them, stopping at the first for which the parsing succeeds. 1168 1169 Args: 1170 expression_types: The expression type(s) to try and parse the token list into. 1171 raw_tokens: The list of tokens. 1172 sql: The original SQL string, used to produce helpful debug messages. 1173 1174 Returns: 1175 The target Expression. 1176 """ 1177 errors = [] 1178 for expression_type in ensure_list(expression_types): 1179 parser = self.EXPRESSION_PARSERS.get(expression_type) 1180 if not parser: 1181 raise TypeError(f"No parser registered for {expression_type}") 1182 1183 try: 1184 return self._parse(parser, raw_tokens, sql) 1185 except ParseError as e: 1186 e.errors[0]["into_expression"] = expression_type 1187 errors.append(e) 1188 1189 raise ParseError( 1190 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1191 errors=merge_errors(errors), 1192 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1229 def check_errors(self) -> None: 1230 """Logs or raises any found errors, depending on the chosen error level setting.""" 1231 if self.error_level == ErrorLevel.WARN: 1232 for error in self.errors: 1233 logger.error(str(error)) 1234 elif self.error_level == ErrorLevel.RAISE and self.errors: 1235 raise ParseError( 1236 concat_messages(self.errors, self.max_errors), 1237 errors=merge_errors(self.errors), 1238 )
Logs or raises any found errors, depending on the chosen error level setting.
1240 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1241 """ 1242 Appends an error in the list of recorded errors or raises it, depending on the chosen 1243 error level setting. 1244 """ 1245 token = token or self._curr or self._prev or Token.string("") 1246 start = token.start 1247 end = token.end + 1 1248 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1249 highlight = self.sql[start:end] 1250 end_context = self.sql[end : end + self.error_message_context] 1251 1252 error = ParseError.new( 1253 f"{message}. Line {token.line}, Col: {token.col}.\n" 1254 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1255 description=message, 1256 line=token.line, 1257 col=token.col, 1258 start_context=start_context, 1259 highlight=highlight, 1260 end_context=end_context, 1261 ) 1262 1263 if self.error_level == ErrorLevel.IMMEDIATE: 1264 raise error 1265 1266 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1268 def expression( 1269 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1270 ) -> E: 1271 """ 1272 Creates a new, validated Expression. 1273 1274 Args: 1275 exp_class: The expression class to instantiate. 1276 comments: An optional list of comments to attach to the expression. 1277 kwargs: The arguments to set for the expression along with their respective values. 1278 1279 Returns: 1280 The target expression. 1281 """ 1282 instance = exp_class(**kwargs) 1283 instance.add_comments(comments) if comments else self._add_comments(instance) 1284 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1291 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1292 """ 1293 Validates an Expression, making sure that all its mandatory arguments are set. 1294 1295 Args: 1296 expression: The expression to validate. 1297 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1298 1299 Returns: 1300 The validated expression. 1301 """ 1302 if self.error_level != ErrorLevel.IGNORE: 1303 for error_message in expression.error_messages(args): 1304 self.raise_error(error_message) 1305 1306 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.