sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18logger = logging.getLogger("sqlglot") 19 20OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 21 22 23def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 24 if len(args) == 1 and args[0].is_star: 25 return exp.StarMap(this=args[0]) 26 27 keys = [] 28 values = [] 29 for i in range(0, len(args), 2): 30 keys.append(args[i]) 31 values.append(args[i + 1]) 32 33 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 34 35 36def build_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 50 # Default argument order is base, expression 51 this = seq_get(args, 0) 52 expression = seq_get(args, 1) 53 54 if expression: 55 if not dialect.LOG_BASE_FIRST: 56 this, expression = expression, this 57 return exp.Log(this=this, expression=expression) 58 59 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 60 61 62def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 63 def _builder(args: t.List, dialect: Dialect) -> E: 64 expression = expr_type( 65 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 66 ) 67 if len(args) > 2 and expr_type is exp.JSONExtract: 68 expression.set("expressions", args[2:]) 69 70 return expression 71 72 return _builder 73 74 75class _Parser(type): 76 def __new__(cls, clsname, bases, attrs): 77 klass = super().__new__(cls, clsname, bases, attrs) 78 79 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 80 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 81 82 return klass 83 84 85class Parser(metaclass=_Parser): 86 """ 87 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 88 89 Args: 90 error_level: The desired error level. 91 Default: ErrorLevel.IMMEDIATE 92 error_message_context: The amount of context to capture from a query string when displaying 93 the error message (in number of characters). 94 Default: 100 95 max_errors: Maximum number of error messages to include in a raised ParseError. 96 This is only relevant if error_level is ErrorLevel.RAISE. 97 Default: 3 98 """ 99 100 FUNCTIONS: t.Dict[str, t.Callable] = { 101 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 102 "CONCAT": lambda args, dialect: exp.Concat( 103 expressions=args, 104 safe=not dialect.STRICT_STRING_CONCAT, 105 coalesce=dialect.CONCAT_COALESCE, 106 ), 107 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 108 expressions=args, 109 safe=not dialect.STRICT_STRING_CONCAT, 110 coalesce=dialect.CONCAT_COALESCE, 111 ), 112 "DATE_TO_DATE_STR": lambda args: exp.Cast( 113 this=seq_get(args, 0), 114 to=exp.DataType(this=exp.DataType.Type.TEXT), 115 ), 116 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 117 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 118 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 119 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 120 "LIKE": build_like, 121 "LOG": build_logarithm, 122 "TIME_TO_TIME_STR": lambda args: exp.Cast( 123 this=seq_get(args, 0), 124 to=exp.DataType(this=exp.DataType.Type.TEXT), 125 ), 126 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 127 this=exp.Cast( 128 this=seq_get(args, 0), 129 to=exp.DataType(this=exp.DataType.Type.TEXT), 130 ), 131 start=exp.Literal.number(1), 132 length=exp.Literal.number(10), 133 ), 134 "VAR_MAP": build_var_map, 135 } 136 137 NO_PAREN_FUNCTIONS = { 138 TokenType.CURRENT_DATE: exp.CurrentDate, 139 TokenType.CURRENT_DATETIME: exp.CurrentDate, 140 TokenType.CURRENT_TIME: exp.CurrentTime, 141 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 142 TokenType.CURRENT_USER: exp.CurrentUser, 143 } 144 145 STRUCT_TYPE_TOKENS = { 146 TokenType.NESTED, 147 TokenType.STRUCT, 148 } 149 150 NESTED_TYPE_TOKENS = { 151 TokenType.ARRAY, 152 TokenType.LOWCARDINALITY, 153 TokenType.MAP, 154 TokenType.NULLABLE, 155 *STRUCT_TYPE_TOKENS, 156 } 157 158 ENUM_TYPE_TOKENS = { 159 TokenType.ENUM, 160 TokenType.ENUM8, 161 TokenType.ENUM16, 162 } 163 164 AGGREGATE_TYPE_TOKENS = { 165 TokenType.AGGREGATEFUNCTION, 166 TokenType.SIMPLEAGGREGATEFUNCTION, 167 } 168 169 TYPE_TOKENS = { 170 TokenType.BIT, 171 TokenType.BOOLEAN, 172 TokenType.TINYINT, 173 TokenType.UTINYINT, 174 TokenType.SMALLINT, 175 TokenType.USMALLINT, 176 TokenType.INT, 177 TokenType.UINT, 178 TokenType.BIGINT, 179 TokenType.UBIGINT, 180 TokenType.INT128, 181 TokenType.UINT128, 182 TokenType.INT256, 183 TokenType.UINT256, 184 TokenType.MEDIUMINT, 185 TokenType.UMEDIUMINT, 186 TokenType.FIXEDSTRING, 187 TokenType.FLOAT, 188 TokenType.DOUBLE, 189 TokenType.CHAR, 190 TokenType.NCHAR, 191 TokenType.VARCHAR, 192 TokenType.NVARCHAR, 193 TokenType.BPCHAR, 194 TokenType.TEXT, 195 TokenType.MEDIUMTEXT, 196 TokenType.LONGTEXT, 197 TokenType.MEDIUMBLOB, 198 TokenType.LONGBLOB, 199 TokenType.BINARY, 200 TokenType.VARBINARY, 201 TokenType.JSON, 202 TokenType.JSONB, 203 TokenType.INTERVAL, 204 TokenType.TINYBLOB, 205 TokenType.TINYTEXT, 206 TokenType.TIME, 207 TokenType.TIMETZ, 208 TokenType.TIMESTAMP, 209 TokenType.TIMESTAMP_S, 210 TokenType.TIMESTAMP_MS, 211 TokenType.TIMESTAMP_NS, 212 TokenType.TIMESTAMPTZ, 213 TokenType.TIMESTAMPLTZ, 214 TokenType.DATETIME, 215 TokenType.DATETIME64, 216 TokenType.DATE, 217 TokenType.DATE32, 218 TokenType.INT4RANGE, 219 TokenType.INT4MULTIRANGE, 220 TokenType.INT8RANGE, 221 TokenType.INT8MULTIRANGE, 222 TokenType.NUMRANGE, 223 TokenType.NUMMULTIRANGE, 224 TokenType.TSRANGE, 225 TokenType.TSMULTIRANGE, 226 TokenType.TSTZRANGE, 227 TokenType.TSTZMULTIRANGE, 228 TokenType.DATERANGE, 229 TokenType.DATEMULTIRANGE, 230 TokenType.DECIMAL, 231 TokenType.UDECIMAL, 232 TokenType.BIGDECIMAL, 233 TokenType.UUID, 234 TokenType.GEOGRAPHY, 235 TokenType.GEOMETRY, 236 TokenType.HLLSKETCH, 237 TokenType.HSTORE, 238 TokenType.PSEUDO_TYPE, 239 TokenType.SUPER, 240 TokenType.SERIAL, 241 TokenType.SMALLSERIAL, 242 TokenType.BIGSERIAL, 243 TokenType.XML, 244 TokenType.YEAR, 245 TokenType.UNIQUEIDENTIFIER, 246 TokenType.USERDEFINED, 247 TokenType.MONEY, 248 TokenType.SMALLMONEY, 249 TokenType.ROWVERSION, 250 TokenType.IMAGE, 251 TokenType.VARIANT, 252 TokenType.OBJECT, 253 TokenType.OBJECT_IDENTIFIER, 254 TokenType.INET, 255 TokenType.IPADDRESS, 256 TokenType.IPPREFIX, 257 TokenType.IPV4, 258 TokenType.IPV6, 259 TokenType.UNKNOWN, 260 TokenType.NULL, 261 *ENUM_TYPE_TOKENS, 262 *NESTED_TYPE_TOKENS, 263 *AGGREGATE_TYPE_TOKENS, 264 } 265 266 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 267 TokenType.BIGINT: TokenType.UBIGINT, 268 TokenType.INT: TokenType.UINT, 269 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 270 TokenType.SMALLINT: TokenType.USMALLINT, 271 TokenType.TINYINT: TokenType.UTINYINT, 272 TokenType.DECIMAL: TokenType.UDECIMAL, 273 } 274 275 SUBQUERY_PREDICATES = { 276 TokenType.ANY: exp.Any, 277 TokenType.ALL: exp.All, 278 TokenType.EXISTS: exp.Exists, 279 TokenType.SOME: exp.Any, 280 } 281 282 RESERVED_TOKENS = { 283 *Tokenizer.SINGLE_TOKENS.values(), 284 TokenType.SELECT, 285 } 286 287 DB_CREATABLES = { 288 TokenType.DATABASE, 289 TokenType.SCHEMA, 290 TokenType.TABLE, 291 TokenType.VIEW, 292 TokenType.MODEL, 293 TokenType.DICTIONARY, 294 TokenType.SEQUENCE, 295 TokenType.STORAGE_INTEGRATION, 296 } 297 298 CREATABLES = { 299 TokenType.COLUMN, 300 TokenType.CONSTRAINT, 301 TokenType.FUNCTION, 302 TokenType.INDEX, 303 TokenType.PROCEDURE, 304 TokenType.FOREIGN_KEY, 305 *DB_CREATABLES, 306 } 307 308 # Tokens that can represent identifiers 309 ID_VAR_TOKENS = { 310 TokenType.VAR, 311 TokenType.ANTI, 312 TokenType.APPLY, 313 TokenType.ASC, 314 TokenType.AUTO_INCREMENT, 315 TokenType.BEGIN, 316 TokenType.BPCHAR, 317 TokenType.CACHE, 318 TokenType.CASE, 319 TokenType.COLLATE, 320 TokenType.COMMAND, 321 TokenType.COMMENT, 322 TokenType.COMMIT, 323 TokenType.CONSTRAINT, 324 TokenType.DEFAULT, 325 TokenType.DELETE, 326 TokenType.DESC, 327 TokenType.DESCRIBE, 328 TokenType.DICTIONARY, 329 TokenType.DIV, 330 TokenType.END, 331 TokenType.EXECUTE, 332 TokenType.ESCAPE, 333 TokenType.FALSE, 334 TokenType.FIRST, 335 TokenType.FILTER, 336 TokenType.FINAL, 337 TokenType.FORMAT, 338 TokenType.FULL, 339 TokenType.IS, 340 TokenType.ISNULL, 341 TokenType.INTERVAL, 342 TokenType.KEEP, 343 TokenType.KILL, 344 TokenType.LEFT, 345 TokenType.LOAD, 346 TokenType.MERGE, 347 TokenType.NATURAL, 348 TokenType.NEXT, 349 TokenType.OFFSET, 350 TokenType.OPERATOR, 351 TokenType.ORDINALITY, 352 TokenType.OVERLAPS, 353 TokenType.OVERWRITE, 354 TokenType.PARTITION, 355 TokenType.PERCENT, 356 TokenType.PIVOT, 357 TokenType.PRAGMA, 358 TokenType.RANGE, 359 TokenType.RECURSIVE, 360 TokenType.REFERENCES, 361 TokenType.REFRESH, 362 TokenType.REPLACE, 363 TokenType.RIGHT, 364 TokenType.ROW, 365 TokenType.ROWS, 366 TokenType.SEMI, 367 TokenType.SET, 368 TokenType.SETTINGS, 369 TokenType.SHOW, 370 TokenType.TEMPORARY, 371 TokenType.TOP, 372 TokenType.TRUE, 373 TokenType.TRUNCATE, 374 TokenType.UNIQUE, 375 TokenType.UNPIVOT, 376 TokenType.UPDATE, 377 TokenType.USE, 378 TokenType.VOLATILE, 379 TokenType.WINDOW, 380 *CREATABLES, 381 *SUBQUERY_PREDICATES, 382 *TYPE_TOKENS, 383 *NO_PAREN_FUNCTIONS, 384 } 385 386 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 387 388 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 389 TokenType.ANTI, 390 TokenType.APPLY, 391 TokenType.ASOF, 392 TokenType.FULL, 393 TokenType.LEFT, 394 TokenType.LOCK, 395 TokenType.NATURAL, 396 TokenType.OFFSET, 397 TokenType.RIGHT, 398 TokenType.SEMI, 399 TokenType.WINDOW, 400 } 401 402 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 403 404 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 405 406 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 407 408 FUNC_TOKENS = { 409 TokenType.COLLATE, 410 TokenType.COMMAND, 411 TokenType.CURRENT_DATE, 412 TokenType.CURRENT_DATETIME, 413 TokenType.CURRENT_TIMESTAMP, 414 TokenType.CURRENT_TIME, 415 TokenType.CURRENT_USER, 416 TokenType.FILTER, 417 TokenType.FIRST, 418 TokenType.FORMAT, 419 TokenType.GLOB, 420 TokenType.IDENTIFIER, 421 TokenType.INDEX, 422 TokenType.ISNULL, 423 TokenType.ILIKE, 424 TokenType.INSERT, 425 TokenType.LIKE, 426 TokenType.MERGE, 427 TokenType.OFFSET, 428 TokenType.PRIMARY_KEY, 429 TokenType.RANGE, 430 TokenType.REPLACE, 431 TokenType.RLIKE, 432 TokenType.ROW, 433 TokenType.UNNEST, 434 TokenType.VAR, 435 TokenType.LEFT, 436 TokenType.RIGHT, 437 TokenType.SEQUENCE, 438 TokenType.DATE, 439 TokenType.DATETIME, 440 TokenType.TABLE, 441 TokenType.TIMESTAMP, 442 TokenType.TIMESTAMPTZ, 443 TokenType.TRUNCATE, 444 TokenType.WINDOW, 445 TokenType.XOR, 446 *TYPE_TOKENS, 447 *SUBQUERY_PREDICATES, 448 } 449 450 CONJUNCTION = { 451 TokenType.AND: exp.And, 452 TokenType.OR: exp.Or, 453 } 454 455 EQUALITY = { 456 TokenType.COLON_EQ: exp.PropertyEQ, 457 TokenType.EQ: exp.EQ, 458 TokenType.NEQ: exp.NEQ, 459 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 460 } 461 462 COMPARISON = { 463 TokenType.GT: exp.GT, 464 TokenType.GTE: exp.GTE, 465 TokenType.LT: exp.LT, 466 TokenType.LTE: exp.LTE, 467 } 468 469 BITWISE = { 470 TokenType.AMP: exp.BitwiseAnd, 471 TokenType.CARET: exp.BitwiseXor, 472 TokenType.PIPE: exp.BitwiseOr, 473 } 474 475 TERM = { 476 TokenType.DASH: exp.Sub, 477 TokenType.PLUS: exp.Add, 478 TokenType.MOD: exp.Mod, 479 TokenType.COLLATE: exp.Collate, 480 } 481 482 FACTOR = { 483 TokenType.DIV: exp.IntDiv, 484 TokenType.LR_ARROW: exp.Distance, 485 TokenType.SLASH: exp.Div, 486 TokenType.STAR: exp.Mul, 487 } 488 489 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 490 491 TIMES = { 492 TokenType.TIME, 493 TokenType.TIMETZ, 494 } 495 496 TIMESTAMPS = { 497 TokenType.TIMESTAMP, 498 TokenType.TIMESTAMPTZ, 499 TokenType.TIMESTAMPLTZ, 500 *TIMES, 501 } 502 503 SET_OPERATIONS = { 504 TokenType.UNION, 505 TokenType.INTERSECT, 506 TokenType.EXCEPT, 507 } 508 509 JOIN_METHODS = { 510 TokenType.NATURAL, 511 TokenType.ASOF, 512 } 513 514 JOIN_SIDES = { 515 TokenType.LEFT, 516 TokenType.RIGHT, 517 TokenType.FULL, 518 } 519 520 JOIN_KINDS = { 521 TokenType.INNER, 522 TokenType.OUTER, 523 TokenType.CROSS, 524 TokenType.SEMI, 525 TokenType.ANTI, 526 } 527 528 JOIN_HINTS: t.Set[str] = set() 529 530 LAMBDAS = { 531 TokenType.ARROW: lambda self, expressions: self.expression( 532 exp.Lambda, 533 this=self._replace_lambda( 534 self._parse_conjunction(), 535 {node.name for node in expressions}, 536 ), 537 expressions=expressions, 538 ), 539 TokenType.FARROW: lambda self, expressions: self.expression( 540 exp.Kwarg, 541 this=exp.var(expressions[0].name), 542 expression=self._parse_conjunction(), 543 ), 544 } 545 546 COLUMN_OPERATORS = { 547 TokenType.DOT: None, 548 TokenType.DCOLON: lambda self, this, to: self.expression( 549 exp.Cast if self.STRICT_CAST else exp.TryCast, 550 this=this, 551 to=to, 552 ), 553 TokenType.ARROW: lambda self, this, path: self.expression( 554 exp.JSONExtract, 555 this=this, 556 expression=self.dialect.to_json_path(path), 557 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 558 ), 559 TokenType.DARROW: lambda self, this, path: self.expression( 560 exp.JSONExtractScalar, 561 this=this, 562 expression=self.dialect.to_json_path(path), 563 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 564 ), 565 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 566 exp.JSONBExtract, 567 this=this, 568 expression=path, 569 ), 570 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 571 exp.JSONBExtractScalar, 572 this=this, 573 expression=path, 574 ), 575 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 576 exp.JSONBContains, 577 this=this, 578 expression=key, 579 ), 580 } 581 582 EXPRESSION_PARSERS = { 583 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 584 exp.Column: lambda self: self._parse_column(), 585 exp.Condition: lambda self: self._parse_conjunction(), 586 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 587 exp.Expression: lambda self: self._parse_expression(), 588 exp.From: lambda self: self._parse_from(), 589 exp.Group: lambda self: self._parse_group(), 590 exp.Having: lambda self: self._parse_having(), 591 exp.Identifier: lambda self: self._parse_id_var(), 592 exp.Join: lambda self: self._parse_join(), 593 exp.Lambda: lambda self: self._parse_lambda(), 594 exp.Lateral: lambda self: self._parse_lateral(), 595 exp.Limit: lambda self: self._parse_limit(), 596 exp.Offset: lambda self: self._parse_offset(), 597 exp.Order: lambda self: self._parse_order(), 598 exp.Ordered: lambda self: self._parse_ordered(), 599 exp.Properties: lambda self: self._parse_properties(), 600 exp.Qualify: lambda self: self._parse_qualify(), 601 exp.Returning: lambda self: self._parse_returning(), 602 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 603 exp.Table: lambda self: self._parse_table_parts(), 604 exp.TableAlias: lambda self: self._parse_table_alias(), 605 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 606 exp.Where: lambda self: self._parse_where(), 607 exp.Window: lambda self: self._parse_named_window(), 608 exp.With: lambda self: self._parse_with(), 609 "JOIN_TYPE": lambda self: self._parse_join_parts(), 610 } 611 612 STATEMENT_PARSERS = { 613 TokenType.ALTER: lambda self: self._parse_alter(), 614 TokenType.BEGIN: lambda self: self._parse_transaction(), 615 TokenType.CACHE: lambda self: self._parse_cache(), 616 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 617 TokenType.COMMENT: lambda self: self._parse_comment(), 618 TokenType.CREATE: lambda self: self._parse_create(), 619 TokenType.DELETE: lambda self: self._parse_delete(), 620 TokenType.DESC: lambda self: self._parse_describe(), 621 TokenType.DESCRIBE: lambda self: self._parse_describe(), 622 TokenType.DROP: lambda self: self._parse_drop(), 623 TokenType.INSERT: lambda self: self._parse_insert(), 624 TokenType.KILL: lambda self: self._parse_kill(), 625 TokenType.LOAD: lambda self: self._parse_load(), 626 TokenType.MERGE: lambda self: self._parse_merge(), 627 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 628 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 629 TokenType.REFRESH: lambda self: self._parse_refresh(), 630 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 631 TokenType.SET: lambda self: self._parse_set(), 632 TokenType.UNCACHE: lambda self: self._parse_uncache(), 633 TokenType.UPDATE: lambda self: self._parse_update(), 634 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 635 TokenType.USE: lambda self: self.expression( 636 exp.Use, 637 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 638 this=self._parse_table(schema=False), 639 ), 640 } 641 642 UNARY_PARSERS = { 643 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 644 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 645 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 646 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 647 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 648 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 649 } 650 651 STRING_PARSERS = { 652 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 653 exp.RawString, this=token.text 654 ), 655 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 656 exp.National, this=token.text 657 ), 658 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 659 TokenType.STRING: lambda self, token: self.expression( 660 exp.Literal, this=token.text, is_string=True 661 ), 662 TokenType.UNICODE_STRING: lambda self, token: self.expression( 663 exp.UnicodeString, 664 this=token.text, 665 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 666 ), 667 } 668 669 NUMERIC_PARSERS = { 670 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 671 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 672 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 673 TokenType.NUMBER: lambda self, token: self.expression( 674 exp.Literal, this=token.text, is_string=False 675 ), 676 } 677 678 PRIMARY_PARSERS = { 679 **STRING_PARSERS, 680 **NUMERIC_PARSERS, 681 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 682 TokenType.NULL: lambda self, _: self.expression(exp.Null), 683 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 684 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 685 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 686 TokenType.STAR: lambda self, _: self.expression( 687 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 688 ), 689 } 690 691 PLACEHOLDER_PARSERS = { 692 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 693 TokenType.PARAMETER: lambda self: self._parse_parameter(), 694 TokenType.COLON: lambda self: ( 695 self.expression(exp.Placeholder, this=self._prev.text) 696 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 697 else None 698 ), 699 } 700 701 RANGE_PARSERS = { 702 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 703 TokenType.GLOB: binary_range_parser(exp.Glob), 704 TokenType.ILIKE: binary_range_parser(exp.ILike), 705 TokenType.IN: lambda self, this: self._parse_in(this), 706 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 707 TokenType.IS: lambda self, this: self._parse_is(this), 708 TokenType.LIKE: binary_range_parser(exp.Like), 709 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 710 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 711 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 712 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 713 } 714 715 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 716 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 717 "AUTO": lambda self: self._parse_auto_property(), 718 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 719 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 720 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 721 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 722 "CHECKSUM": lambda self: self._parse_checksum(), 723 "CLUSTER BY": lambda self: self._parse_cluster(), 724 "CLUSTERED": lambda self: self._parse_clustered_by(), 725 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 726 exp.CollateProperty, **kwargs 727 ), 728 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 729 "CONTAINS": lambda self: self._parse_contains_property(), 730 "COPY": lambda self: self._parse_copy_property(), 731 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 732 "DEFINER": lambda self: self._parse_definer(), 733 "DETERMINISTIC": lambda self: self.expression( 734 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 735 ), 736 "DISTKEY": lambda self: self._parse_distkey(), 737 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 738 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 739 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 740 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 741 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 742 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 743 "FREESPACE": lambda self: self._parse_freespace(), 744 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 745 "HEAP": lambda self: self.expression(exp.HeapProperty), 746 "IMMUTABLE": lambda self: self.expression( 747 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 748 ), 749 "INHERITS": lambda self: self.expression( 750 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 751 ), 752 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 753 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 754 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 755 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 756 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 757 "LIKE": lambda self: self._parse_create_like(), 758 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 759 "LOCK": lambda self: self._parse_locking(), 760 "LOCKING": lambda self: self._parse_locking(), 761 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 762 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 763 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 764 "MODIFIES": lambda self: self._parse_modifies_property(), 765 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 766 "NO": lambda self: self._parse_no_property(), 767 "ON": lambda self: self._parse_on_property(), 768 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 769 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 770 "PARTITION": lambda self: self._parse_partitioned_of(), 771 "PARTITION BY": lambda self: self._parse_partitioned_by(), 772 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 773 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 774 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 775 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 776 "READS": lambda self: self._parse_reads_property(), 777 "REMOTE": lambda self: self._parse_remote_with_connection(), 778 "RETURNS": lambda self: self._parse_returns(), 779 "ROW": lambda self: self._parse_row(), 780 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 781 "SAMPLE": lambda self: self.expression( 782 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 783 ), 784 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 785 "SETTINGS": lambda self: self.expression( 786 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 787 ), 788 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 789 "SORTKEY": lambda self: self._parse_sortkey(), 790 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 791 "STABLE": lambda self: self.expression( 792 exp.StabilityProperty, this=exp.Literal.string("STABLE") 793 ), 794 "STORED": lambda self: self._parse_stored(), 795 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 796 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 797 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 798 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 799 "TO": lambda self: self._parse_to_table(), 800 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 801 "TRANSFORM": lambda self: self.expression( 802 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 803 ), 804 "TTL": lambda self: self._parse_ttl(), 805 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 806 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 807 "VOLATILE": lambda self: self._parse_volatile_property(), 808 "WITH": lambda self: self._parse_with_property(), 809 } 810 811 CONSTRAINT_PARSERS = { 812 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 813 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 814 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 815 "CHARACTER SET": lambda self: self.expression( 816 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 817 ), 818 "CHECK": lambda self: self.expression( 819 exp.CheckColumnConstraint, 820 this=self._parse_wrapped(self._parse_conjunction), 821 enforced=self._match_text_seq("ENFORCED"), 822 ), 823 "COLLATE": lambda self: self.expression( 824 exp.CollateColumnConstraint, this=self._parse_var() 825 ), 826 "COMMENT": lambda self: self.expression( 827 exp.CommentColumnConstraint, this=self._parse_string() 828 ), 829 "COMPRESS": lambda self: self._parse_compress(), 830 "CLUSTERED": lambda self: self.expression( 831 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 832 ), 833 "NONCLUSTERED": lambda self: self.expression( 834 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 835 ), 836 "DEFAULT": lambda self: self.expression( 837 exp.DefaultColumnConstraint, this=self._parse_bitwise() 838 ), 839 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 840 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 841 "FORMAT": lambda self: self.expression( 842 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 843 ), 844 "GENERATED": lambda self: self._parse_generated_as_identity(), 845 "IDENTITY": lambda self: self._parse_auto_increment(), 846 "INLINE": lambda self: self._parse_inline(), 847 "LIKE": lambda self: self._parse_create_like(), 848 "NOT": lambda self: self._parse_not_constraint(), 849 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 850 "ON": lambda self: ( 851 self._match(TokenType.UPDATE) 852 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 853 ) 854 or self.expression(exp.OnProperty, this=self._parse_id_var()), 855 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 856 "PERIOD": lambda self: self._parse_period_for_system_time(), 857 "PRIMARY KEY": lambda self: self._parse_primary_key(), 858 "REFERENCES": lambda self: self._parse_references(match=False), 859 "TITLE": lambda self: self.expression( 860 exp.TitleColumnConstraint, this=self._parse_var_or_string() 861 ), 862 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 863 "UNIQUE": lambda self: self._parse_unique(), 864 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 865 "WITH": lambda self: self.expression( 866 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 867 ), 868 } 869 870 ALTER_PARSERS = { 871 "ADD": lambda self: self._parse_alter_table_add(), 872 "ALTER": lambda self: self._parse_alter_table_alter(), 873 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 874 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 875 "DROP": lambda self: self._parse_alter_table_drop(), 876 "RENAME": lambda self: self._parse_alter_table_rename(), 877 } 878 879 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 880 881 NO_PAREN_FUNCTION_PARSERS = { 882 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 883 "CASE": lambda self: self._parse_case(), 884 "IF": lambda self: self._parse_if(), 885 "NEXT": lambda self: self._parse_next_value_for(), 886 } 887 888 INVALID_FUNC_NAME_TOKENS = { 889 TokenType.IDENTIFIER, 890 TokenType.STRING, 891 } 892 893 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 894 895 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 896 897 FUNCTION_PARSERS = { 898 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 899 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 900 "DECODE": lambda self: self._parse_decode(), 901 "EXTRACT": lambda self: self._parse_extract(), 902 "JSON_OBJECT": lambda self: self._parse_json_object(), 903 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 904 "JSON_TABLE": lambda self: self._parse_json_table(), 905 "MATCH": lambda self: self._parse_match_against(), 906 "OPENJSON": lambda self: self._parse_open_json(), 907 "POSITION": lambda self: self._parse_position(), 908 "PREDICT": lambda self: self._parse_predict(), 909 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 910 "STRING_AGG": lambda self: self._parse_string_agg(), 911 "SUBSTRING": lambda self: self._parse_substring(), 912 "TRIM": lambda self: self._parse_trim(), 913 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 914 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 915 } 916 917 QUERY_MODIFIER_PARSERS = { 918 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 919 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 920 TokenType.WHERE: lambda self: ("where", self._parse_where()), 921 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 922 TokenType.HAVING: lambda self: ("having", self._parse_having()), 923 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 924 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 925 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 926 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 927 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 928 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 929 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 930 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 931 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 932 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 933 TokenType.CLUSTER_BY: lambda self: ( 934 "cluster", 935 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 936 ), 937 TokenType.DISTRIBUTE_BY: lambda self: ( 938 "distribute", 939 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 940 ), 941 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 942 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 943 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 944 } 945 946 SET_PARSERS = { 947 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 948 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 949 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 950 "TRANSACTION": lambda self: self._parse_set_transaction(), 951 } 952 953 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 954 955 TYPE_LITERAL_PARSERS = { 956 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 957 } 958 959 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 960 961 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 962 963 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 964 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 965 "ISOLATION": ( 966 ("LEVEL", "REPEATABLE", "READ"), 967 ("LEVEL", "READ", "COMMITTED"), 968 ("LEVEL", "READ", "UNCOMITTED"), 969 ("LEVEL", "SERIALIZABLE"), 970 ), 971 "READ": ("WRITE", "ONLY"), 972 } 973 974 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 975 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 976 ) 977 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 978 979 CREATE_SEQUENCE: OPTIONS_TYPE = { 980 "SCALE": ("EXTEND", "NOEXTEND"), 981 "SHARD": ("EXTEND", "NOEXTEND"), 982 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 983 **dict.fromkeys( 984 ( 985 "SESSION", 986 "GLOBAL", 987 "KEEP", 988 "NOKEEP", 989 "ORDER", 990 "NOORDER", 991 "NOCACHE", 992 "CYCLE", 993 "NOCYCLE", 994 "NOMINVALUE", 995 "NOMAXVALUE", 996 "NOSCALE", 997 "NOSHARD", 998 ), 999 tuple(), 1000 ), 1001 } 1002 1003 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1004 1005 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1006 1007 CLONE_KEYWORDS = {"CLONE", "COPY"} 1008 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1009 1010 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 1011 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1012 1013 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1014 1015 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1016 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1017 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1018 1019 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1020 1021 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1022 1023 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1024 1025 DISTINCT_TOKENS = {TokenType.DISTINCT} 1026 1027 NULL_TOKENS = {TokenType.NULL} 1028 1029 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1030 1031 STRICT_CAST = True 1032 1033 PREFIXED_PIVOT_COLUMNS = False 1034 IDENTIFY_PIVOT_STRINGS = False 1035 1036 LOG_DEFAULTS_TO_LN = False 1037 1038 # Whether ADD is present for each column added by ALTER TABLE 1039 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1040 1041 # Whether the table sample clause expects CSV syntax 1042 TABLESAMPLE_CSV = False 1043 1044 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1045 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1046 1047 # Whether the TRIM function expects the characters to trim as its first argument 1048 TRIM_PATTERN_FIRST = False 1049 1050 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1051 STRING_ALIASES = False 1052 1053 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1054 MODIFIERS_ATTACHED_TO_UNION = True 1055 UNION_MODIFIERS = {"order", "limit", "offset"} 1056 1057 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1058 NO_PAREN_IF_COMMANDS = True 1059 1060 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1061 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1062 1063 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1064 # If this is True and '(' is not found, the keyword will be treated as an identifier 1065 VALUES_FOLLOWED_BY_PAREN = True 1066 1067 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1068 SUPPORTS_IMPLICIT_UNNEST = False 1069 1070 __slots__ = ( 1071 "error_level", 1072 "error_message_context", 1073 "max_errors", 1074 "dialect", 1075 "sql", 1076 "errors", 1077 "_tokens", 1078 "_index", 1079 "_curr", 1080 "_next", 1081 "_prev", 1082 "_prev_comments", 1083 ) 1084 1085 # Autofilled 1086 SHOW_TRIE: t.Dict = {} 1087 SET_TRIE: t.Dict = {} 1088 1089 def __init__( 1090 self, 1091 error_level: t.Optional[ErrorLevel] = None, 1092 error_message_context: int = 100, 1093 max_errors: int = 3, 1094 dialect: DialectType = None, 1095 ): 1096 from sqlglot.dialects import Dialect 1097 1098 self.error_level = error_level or ErrorLevel.IMMEDIATE 1099 self.error_message_context = error_message_context 1100 self.max_errors = max_errors 1101 self.dialect = Dialect.get_or_raise(dialect) 1102 self.reset() 1103 1104 def reset(self): 1105 self.sql = "" 1106 self.errors = [] 1107 self._tokens = [] 1108 self._index = 0 1109 self._curr = None 1110 self._next = None 1111 self._prev = None 1112 self._prev_comments = None 1113 1114 def parse( 1115 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1116 ) -> t.List[t.Optional[exp.Expression]]: 1117 """ 1118 Parses a list of tokens and returns a list of syntax trees, one tree 1119 per parsed SQL statement. 1120 1121 Args: 1122 raw_tokens: The list of tokens. 1123 sql: The original SQL string, used to produce helpful debug messages. 1124 1125 Returns: 1126 The list of the produced syntax trees. 1127 """ 1128 return self._parse( 1129 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1130 ) 1131 1132 def parse_into( 1133 self, 1134 expression_types: exp.IntoType, 1135 raw_tokens: t.List[Token], 1136 sql: t.Optional[str] = None, 1137 ) -> t.List[t.Optional[exp.Expression]]: 1138 """ 1139 Parses a list of tokens into a given Expression type. If a collection of Expression 1140 types is given instead, this method will try to parse the token list into each one 1141 of them, stopping at the first for which the parsing succeeds. 1142 1143 Args: 1144 expression_types: The expression type(s) to try and parse the token list into. 1145 raw_tokens: The list of tokens. 1146 sql: The original SQL string, used to produce helpful debug messages. 1147 1148 Returns: 1149 The target Expression. 1150 """ 1151 errors = [] 1152 for expression_type in ensure_list(expression_types): 1153 parser = self.EXPRESSION_PARSERS.get(expression_type) 1154 if not parser: 1155 raise TypeError(f"No parser registered for {expression_type}") 1156 1157 try: 1158 return self._parse(parser, raw_tokens, sql) 1159 except ParseError as e: 1160 e.errors[0]["into_expression"] = expression_type 1161 errors.append(e) 1162 1163 raise ParseError( 1164 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1165 errors=merge_errors(errors), 1166 ) from errors[-1] 1167 1168 def _parse( 1169 self, 1170 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1171 raw_tokens: t.List[Token], 1172 sql: t.Optional[str] = None, 1173 ) -> t.List[t.Optional[exp.Expression]]: 1174 self.reset() 1175 self.sql = sql or "" 1176 1177 total = len(raw_tokens) 1178 chunks: t.List[t.List[Token]] = [[]] 1179 1180 for i, token in enumerate(raw_tokens): 1181 if token.token_type == TokenType.SEMICOLON: 1182 if i < total - 1: 1183 chunks.append([]) 1184 else: 1185 chunks[-1].append(token) 1186 1187 expressions = [] 1188 1189 for tokens in chunks: 1190 self._index = -1 1191 self._tokens = tokens 1192 self._advance() 1193 1194 expressions.append(parse_method(self)) 1195 1196 if self._index < len(self._tokens): 1197 self.raise_error("Invalid expression / Unexpected token") 1198 1199 self.check_errors() 1200 1201 return expressions 1202 1203 def check_errors(self) -> None: 1204 """Logs or raises any found errors, depending on the chosen error level setting.""" 1205 if self.error_level == ErrorLevel.WARN: 1206 for error in self.errors: 1207 logger.error(str(error)) 1208 elif self.error_level == ErrorLevel.RAISE and self.errors: 1209 raise ParseError( 1210 concat_messages(self.errors, self.max_errors), 1211 errors=merge_errors(self.errors), 1212 ) 1213 1214 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1215 """ 1216 Appends an error in the list of recorded errors or raises it, depending on the chosen 1217 error level setting. 1218 """ 1219 token = token or self._curr or self._prev or Token.string("") 1220 start = token.start 1221 end = token.end + 1 1222 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1223 highlight = self.sql[start:end] 1224 end_context = self.sql[end : end + self.error_message_context] 1225 1226 error = ParseError.new( 1227 f"{message}. Line {token.line}, Col: {token.col}.\n" 1228 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1229 description=message, 1230 line=token.line, 1231 col=token.col, 1232 start_context=start_context, 1233 highlight=highlight, 1234 end_context=end_context, 1235 ) 1236 1237 if self.error_level == ErrorLevel.IMMEDIATE: 1238 raise error 1239 1240 self.errors.append(error) 1241 1242 def expression( 1243 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1244 ) -> E: 1245 """ 1246 Creates a new, validated Expression. 1247 1248 Args: 1249 exp_class: The expression class to instantiate. 1250 comments: An optional list of comments to attach to the expression. 1251 kwargs: The arguments to set for the expression along with their respective values. 1252 1253 Returns: 1254 The target expression. 1255 """ 1256 instance = exp_class(**kwargs) 1257 instance.add_comments(comments) if comments else self._add_comments(instance) 1258 return self.validate_expression(instance) 1259 1260 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1261 if expression and self._prev_comments: 1262 expression.add_comments(self._prev_comments) 1263 self._prev_comments = None 1264 1265 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1266 """ 1267 Validates an Expression, making sure that all its mandatory arguments are set. 1268 1269 Args: 1270 expression: The expression to validate. 1271 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1272 1273 Returns: 1274 The validated expression. 1275 """ 1276 if self.error_level != ErrorLevel.IGNORE: 1277 for error_message in expression.error_messages(args): 1278 self.raise_error(error_message) 1279 1280 return expression 1281 1282 def _find_sql(self, start: Token, end: Token) -> str: 1283 return self.sql[start.start : end.end + 1] 1284 1285 def _is_connected(self) -> bool: 1286 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1287 1288 def _advance(self, times: int = 1) -> None: 1289 self._index += times 1290 self._curr = seq_get(self._tokens, self._index) 1291 self._next = seq_get(self._tokens, self._index + 1) 1292 1293 if self._index > 0: 1294 self._prev = self._tokens[self._index - 1] 1295 self._prev_comments = self._prev.comments 1296 else: 1297 self._prev = None 1298 self._prev_comments = None 1299 1300 def _retreat(self, index: int) -> None: 1301 if index != self._index: 1302 self._advance(index - self._index) 1303 1304 def _warn_unsupported(self) -> None: 1305 if len(self._tokens) <= 1: 1306 return 1307 1308 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1309 # interested in emitting a warning for the one being currently processed. 1310 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1311 1312 logger.warning( 1313 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1314 ) 1315 1316 def _parse_command(self) -> exp.Command: 1317 self._warn_unsupported() 1318 return self.expression( 1319 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1320 ) 1321 1322 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1323 start = self._prev 1324 exists = self._parse_exists() if allow_exists else None 1325 1326 self._match(TokenType.ON) 1327 1328 kind = self._match_set(self.CREATABLES) and self._prev 1329 if not kind: 1330 return self._parse_as_command(start) 1331 1332 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1333 this = self._parse_user_defined_function(kind=kind.token_type) 1334 elif kind.token_type == TokenType.TABLE: 1335 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1336 elif kind.token_type == TokenType.COLUMN: 1337 this = self._parse_column() 1338 else: 1339 this = self._parse_id_var() 1340 1341 self._match(TokenType.IS) 1342 1343 return self.expression( 1344 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1345 ) 1346 1347 def _parse_to_table( 1348 self, 1349 ) -> exp.ToTableProperty: 1350 table = self._parse_table_parts(schema=True) 1351 return self.expression(exp.ToTableProperty, this=table) 1352 1353 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1354 def _parse_ttl(self) -> exp.Expression: 1355 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1356 this = self._parse_bitwise() 1357 1358 if self._match_text_seq("DELETE"): 1359 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1360 if self._match_text_seq("RECOMPRESS"): 1361 return self.expression( 1362 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1363 ) 1364 if self._match_text_seq("TO", "DISK"): 1365 return self.expression( 1366 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1367 ) 1368 if self._match_text_seq("TO", "VOLUME"): 1369 return self.expression( 1370 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1371 ) 1372 1373 return this 1374 1375 expressions = self._parse_csv(_parse_ttl_action) 1376 where = self._parse_where() 1377 group = self._parse_group() 1378 1379 aggregates = None 1380 if group and self._match(TokenType.SET): 1381 aggregates = self._parse_csv(self._parse_set_item) 1382 1383 return self.expression( 1384 exp.MergeTreeTTL, 1385 expressions=expressions, 1386 where=where, 1387 group=group, 1388 aggregates=aggregates, 1389 ) 1390 1391 def _parse_statement(self) -> t.Optional[exp.Expression]: 1392 if self._curr is None: 1393 return None 1394 1395 if self._match_set(self.STATEMENT_PARSERS): 1396 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1397 1398 if self._match_set(Tokenizer.COMMANDS): 1399 return self._parse_command() 1400 1401 expression = self._parse_expression() 1402 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1403 return self._parse_query_modifiers(expression) 1404 1405 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1406 start = self._prev 1407 temporary = self._match(TokenType.TEMPORARY) 1408 materialized = self._match_text_seq("MATERIALIZED") 1409 1410 kind = self._match_set(self.CREATABLES) and self._prev.text 1411 if not kind: 1412 return self._parse_as_command(start) 1413 1414 return self.expression( 1415 exp.Drop, 1416 comments=start.comments, 1417 exists=exists or self._parse_exists(), 1418 this=self._parse_table( 1419 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1420 ), 1421 kind=kind, 1422 temporary=temporary, 1423 materialized=materialized, 1424 cascade=self._match_text_seq("CASCADE"), 1425 constraints=self._match_text_seq("CONSTRAINTS"), 1426 purge=self._match_text_seq("PURGE"), 1427 ) 1428 1429 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1430 return ( 1431 self._match_text_seq("IF") 1432 and (not not_ or self._match(TokenType.NOT)) 1433 and self._match(TokenType.EXISTS) 1434 ) 1435 1436 def _parse_create(self) -> exp.Create | exp.Command: 1437 # Note: this can't be None because we've matched a statement parser 1438 start = self._prev 1439 comments = self._prev_comments 1440 1441 replace = ( 1442 start.token_type == TokenType.REPLACE 1443 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1444 or self._match_pair(TokenType.OR, TokenType.ALTER) 1445 ) 1446 1447 unique = self._match(TokenType.UNIQUE) 1448 1449 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1450 self._advance() 1451 1452 properties = None 1453 create_token = self._match_set(self.CREATABLES) and self._prev 1454 1455 if not create_token: 1456 # exp.Properties.Location.POST_CREATE 1457 properties = self._parse_properties() 1458 create_token = self._match_set(self.CREATABLES) and self._prev 1459 1460 if not properties or not create_token: 1461 return self._parse_as_command(start) 1462 1463 exists = self._parse_exists(not_=True) 1464 this = None 1465 expression: t.Optional[exp.Expression] = None 1466 indexes = None 1467 no_schema_binding = None 1468 begin = None 1469 end = None 1470 clone = None 1471 1472 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1473 nonlocal properties 1474 if properties and temp_props: 1475 properties.expressions.extend(temp_props.expressions) 1476 elif temp_props: 1477 properties = temp_props 1478 1479 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1480 this = self._parse_user_defined_function(kind=create_token.token_type) 1481 1482 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1483 extend_props(self._parse_properties()) 1484 1485 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1486 1487 if not expression: 1488 if self._match(TokenType.COMMAND): 1489 expression = self._parse_as_command(self._prev) 1490 else: 1491 begin = self._match(TokenType.BEGIN) 1492 return_ = self._match_text_seq("RETURN") 1493 1494 if self._match(TokenType.STRING, advance=False): 1495 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1496 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1497 expression = self._parse_string() 1498 extend_props(self._parse_properties()) 1499 else: 1500 expression = self._parse_statement() 1501 1502 end = self._match_text_seq("END") 1503 1504 if return_: 1505 expression = self.expression(exp.Return, this=expression) 1506 elif create_token.token_type == TokenType.INDEX: 1507 this = self._parse_index(index=self._parse_id_var()) 1508 elif create_token.token_type in self.DB_CREATABLES: 1509 table_parts = self._parse_table_parts( 1510 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1511 ) 1512 1513 # exp.Properties.Location.POST_NAME 1514 self._match(TokenType.COMMA) 1515 extend_props(self._parse_properties(before=True)) 1516 1517 this = self._parse_schema(this=table_parts) 1518 1519 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1520 extend_props(self._parse_properties()) 1521 1522 self._match(TokenType.ALIAS) 1523 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1524 # exp.Properties.Location.POST_ALIAS 1525 extend_props(self._parse_properties()) 1526 1527 if create_token.token_type == TokenType.SEQUENCE: 1528 expression = self._parse_types() 1529 extend_props(self._parse_properties()) 1530 else: 1531 expression = self._parse_ddl_select() 1532 1533 if create_token.token_type == TokenType.TABLE: 1534 # exp.Properties.Location.POST_EXPRESSION 1535 extend_props(self._parse_properties()) 1536 1537 indexes = [] 1538 while True: 1539 index = self._parse_index() 1540 1541 # exp.Properties.Location.POST_INDEX 1542 extend_props(self._parse_properties()) 1543 1544 if not index: 1545 break 1546 else: 1547 self._match(TokenType.COMMA) 1548 indexes.append(index) 1549 elif create_token.token_type == TokenType.VIEW: 1550 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1551 no_schema_binding = True 1552 1553 shallow = self._match_text_seq("SHALLOW") 1554 1555 if self._match_texts(self.CLONE_KEYWORDS): 1556 copy = self._prev.text.lower() == "copy" 1557 clone = self.expression( 1558 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1559 ) 1560 1561 if self._curr: 1562 return self._parse_as_command(start) 1563 1564 return self.expression( 1565 exp.Create, 1566 comments=comments, 1567 this=this, 1568 kind=create_token.text.upper(), 1569 replace=replace, 1570 unique=unique, 1571 expression=expression, 1572 exists=exists, 1573 properties=properties, 1574 indexes=indexes, 1575 no_schema_binding=no_schema_binding, 1576 begin=begin, 1577 end=end, 1578 clone=clone, 1579 ) 1580 1581 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1582 seq = exp.SequenceProperties() 1583 1584 options = [] 1585 index = self._index 1586 1587 while self._curr: 1588 if self._match_text_seq("INCREMENT"): 1589 self._match_text_seq("BY") 1590 self._match_text_seq("=") 1591 seq.set("increment", self._parse_term()) 1592 elif self._match_text_seq("MINVALUE"): 1593 seq.set("minvalue", self._parse_term()) 1594 elif self._match_text_seq("MAXVALUE"): 1595 seq.set("maxvalue", self._parse_term()) 1596 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1597 self._match_text_seq("=") 1598 seq.set("start", self._parse_term()) 1599 elif self._match_text_seq("CACHE"): 1600 # T-SQL allows empty CACHE which is initialized dynamically 1601 seq.set("cache", self._parse_number() or True) 1602 elif self._match_text_seq("OWNED", "BY"): 1603 # "OWNED BY NONE" is the default 1604 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1605 else: 1606 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1607 if opt: 1608 options.append(opt) 1609 else: 1610 break 1611 1612 seq.set("options", options if options else None) 1613 return None if self._index == index else seq 1614 1615 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1616 # only used for teradata currently 1617 self._match(TokenType.COMMA) 1618 1619 kwargs = { 1620 "no": self._match_text_seq("NO"), 1621 "dual": self._match_text_seq("DUAL"), 1622 "before": self._match_text_seq("BEFORE"), 1623 "default": self._match_text_seq("DEFAULT"), 1624 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1625 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1626 "after": self._match_text_seq("AFTER"), 1627 "minimum": self._match_texts(("MIN", "MINIMUM")), 1628 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1629 } 1630 1631 if self._match_texts(self.PROPERTY_PARSERS): 1632 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1633 try: 1634 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1635 except TypeError: 1636 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1637 1638 return None 1639 1640 def _parse_property(self) -> t.Optional[exp.Expression]: 1641 if self._match_texts(self.PROPERTY_PARSERS): 1642 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1643 1644 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1645 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1646 1647 if self._match_text_seq("COMPOUND", "SORTKEY"): 1648 return self._parse_sortkey(compound=True) 1649 1650 if self._match_text_seq("SQL", "SECURITY"): 1651 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1652 1653 index = self._index 1654 key = self._parse_column() 1655 1656 if not self._match(TokenType.EQ): 1657 self._retreat(index) 1658 return self._parse_sequence_properties() 1659 1660 return self.expression( 1661 exp.Property, 1662 this=key.to_dot() if isinstance(key, exp.Column) else key, 1663 value=self._parse_column() or self._parse_var(any_token=True), 1664 ) 1665 1666 def _parse_stored(self) -> exp.FileFormatProperty: 1667 self._match(TokenType.ALIAS) 1668 1669 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1670 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1671 1672 return self.expression( 1673 exp.FileFormatProperty, 1674 this=( 1675 self.expression( 1676 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1677 ) 1678 if input_format or output_format 1679 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1680 ), 1681 ) 1682 1683 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1684 self._match(TokenType.EQ) 1685 self._match(TokenType.ALIAS) 1686 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1687 1688 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1689 properties = [] 1690 while True: 1691 if before: 1692 prop = self._parse_property_before() 1693 else: 1694 prop = self._parse_property() 1695 if not prop: 1696 break 1697 for p in ensure_list(prop): 1698 properties.append(p) 1699 1700 if properties: 1701 return self.expression(exp.Properties, expressions=properties) 1702 1703 return None 1704 1705 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1706 return self.expression( 1707 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1708 ) 1709 1710 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1711 if self._index >= 2: 1712 pre_volatile_token = self._tokens[self._index - 2] 1713 else: 1714 pre_volatile_token = None 1715 1716 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1717 return exp.VolatileProperty() 1718 1719 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1720 1721 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1722 self._match_pair(TokenType.EQ, TokenType.ON) 1723 1724 prop = self.expression(exp.WithSystemVersioningProperty) 1725 if self._match(TokenType.L_PAREN): 1726 self._match_text_seq("HISTORY_TABLE", "=") 1727 prop.set("this", self._parse_table_parts()) 1728 1729 if self._match(TokenType.COMMA): 1730 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1731 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1732 1733 self._match_r_paren() 1734 1735 return prop 1736 1737 def _parse_with_property( 1738 self, 1739 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1740 if self._match(TokenType.L_PAREN, advance=False): 1741 return self._parse_wrapped_csv(self._parse_property) 1742 1743 if self._match_text_seq("JOURNAL"): 1744 return self._parse_withjournaltable() 1745 1746 if self._match_text_seq("DATA"): 1747 return self._parse_withdata(no=False) 1748 elif self._match_text_seq("NO", "DATA"): 1749 return self._parse_withdata(no=True) 1750 1751 if not self._next: 1752 return None 1753 1754 return self._parse_withisolatedloading() 1755 1756 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1757 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1758 self._match(TokenType.EQ) 1759 1760 user = self._parse_id_var() 1761 self._match(TokenType.PARAMETER) 1762 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1763 1764 if not user or not host: 1765 return None 1766 1767 return exp.DefinerProperty(this=f"{user}@{host}") 1768 1769 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1770 self._match(TokenType.TABLE) 1771 self._match(TokenType.EQ) 1772 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1773 1774 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1775 return self.expression(exp.LogProperty, no=no) 1776 1777 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1778 return self.expression(exp.JournalProperty, **kwargs) 1779 1780 def _parse_checksum(self) -> exp.ChecksumProperty: 1781 self._match(TokenType.EQ) 1782 1783 on = None 1784 if self._match(TokenType.ON): 1785 on = True 1786 elif self._match_text_seq("OFF"): 1787 on = False 1788 1789 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1790 1791 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1792 return self.expression( 1793 exp.Cluster, 1794 expressions=( 1795 self._parse_wrapped_csv(self._parse_ordered) 1796 if wrapped 1797 else self._parse_csv(self._parse_ordered) 1798 ), 1799 ) 1800 1801 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1802 self._match_text_seq("BY") 1803 1804 self._match_l_paren() 1805 expressions = self._parse_csv(self._parse_column) 1806 self._match_r_paren() 1807 1808 if self._match_text_seq("SORTED", "BY"): 1809 self._match_l_paren() 1810 sorted_by = self._parse_csv(self._parse_ordered) 1811 self._match_r_paren() 1812 else: 1813 sorted_by = None 1814 1815 self._match(TokenType.INTO) 1816 buckets = self._parse_number() 1817 self._match_text_seq("BUCKETS") 1818 1819 return self.expression( 1820 exp.ClusteredByProperty, 1821 expressions=expressions, 1822 sorted_by=sorted_by, 1823 buckets=buckets, 1824 ) 1825 1826 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1827 if not self._match_text_seq("GRANTS"): 1828 self._retreat(self._index - 1) 1829 return None 1830 1831 return self.expression(exp.CopyGrantsProperty) 1832 1833 def _parse_freespace(self) -> exp.FreespaceProperty: 1834 self._match(TokenType.EQ) 1835 return self.expression( 1836 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1837 ) 1838 1839 def _parse_mergeblockratio( 1840 self, no: bool = False, default: bool = False 1841 ) -> exp.MergeBlockRatioProperty: 1842 if self._match(TokenType.EQ): 1843 return self.expression( 1844 exp.MergeBlockRatioProperty, 1845 this=self._parse_number(), 1846 percent=self._match(TokenType.PERCENT), 1847 ) 1848 1849 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1850 1851 def _parse_datablocksize( 1852 self, 1853 default: t.Optional[bool] = None, 1854 minimum: t.Optional[bool] = None, 1855 maximum: t.Optional[bool] = None, 1856 ) -> exp.DataBlocksizeProperty: 1857 self._match(TokenType.EQ) 1858 size = self._parse_number() 1859 1860 units = None 1861 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1862 units = self._prev.text 1863 1864 return self.expression( 1865 exp.DataBlocksizeProperty, 1866 size=size, 1867 units=units, 1868 default=default, 1869 minimum=minimum, 1870 maximum=maximum, 1871 ) 1872 1873 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1874 self._match(TokenType.EQ) 1875 always = self._match_text_seq("ALWAYS") 1876 manual = self._match_text_seq("MANUAL") 1877 never = self._match_text_seq("NEVER") 1878 default = self._match_text_seq("DEFAULT") 1879 1880 autotemp = None 1881 if self._match_text_seq("AUTOTEMP"): 1882 autotemp = self._parse_schema() 1883 1884 return self.expression( 1885 exp.BlockCompressionProperty, 1886 always=always, 1887 manual=manual, 1888 never=never, 1889 default=default, 1890 autotemp=autotemp, 1891 ) 1892 1893 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1894 no = self._match_text_seq("NO") 1895 concurrent = self._match_text_seq("CONCURRENT") 1896 self._match_text_seq("ISOLATED", "LOADING") 1897 for_all = self._match_text_seq("FOR", "ALL") 1898 for_insert = self._match_text_seq("FOR", "INSERT") 1899 for_none = self._match_text_seq("FOR", "NONE") 1900 return self.expression( 1901 exp.IsolatedLoadingProperty, 1902 no=no, 1903 concurrent=concurrent, 1904 for_all=for_all, 1905 for_insert=for_insert, 1906 for_none=for_none, 1907 ) 1908 1909 def _parse_locking(self) -> exp.LockingProperty: 1910 if self._match(TokenType.TABLE): 1911 kind = "TABLE" 1912 elif self._match(TokenType.VIEW): 1913 kind = "VIEW" 1914 elif self._match(TokenType.ROW): 1915 kind = "ROW" 1916 elif self._match_text_seq("DATABASE"): 1917 kind = "DATABASE" 1918 else: 1919 kind = None 1920 1921 if kind in ("DATABASE", "TABLE", "VIEW"): 1922 this = self._parse_table_parts() 1923 else: 1924 this = None 1925 1926 if self._match(TokenType.FOR): 1927 for_or_in = "FOR" 1928 elif self._match(TokenType.IN): 1929 for_or_in = "IN" 1930 else: 1931 for_or_in = None 1932 1933 if self._match_text_seq("ACCESS"): 1934 lock_type = "ACCESS" 1935 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1936 lock_type = "EXCLUSIVE" 1937 elif self._match_text_seq("SHARE"): 1938 lock_type = "SHARE" 1939 elif self._match_text_seq("READ"): 1940 lock_type = "READ" 1941 elif self._match_text_seq("WRITE"): 1942 lock_type = "WRITE" 1943 elif self._match_text_seq("CHECKSUM"): 1944 lock_type = "CHECKSUM" 1945 else: 1946 lock_type = None 1947 1948 override = self._match_text_seq("OVERRIDE") 1949 1950 return self.expression( 1951 exp.LockingProperty, 1952 this=this, 1953 kind=kind, 1954 for_or_in=for_or_in, 1955 lock_type=lock_type, 1956 override=override, 1957 ) 1958 1959 def _parse_partition_by(self) -> t.List[exp.Expression]: 1960 if self._match(TokenType.PARTITION_BY): 1961 return self._parse_csv(self._parse_conjunction) 1962 return [] 1963 1964 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1965 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1966 if self._match_text_seq("MINVALUE"): 1967 return exp.var("MINVALUE") 1968 if self._match_text_seq("MAXVALUE"): 1969 return exp.var("MAXVALUE") 1970 return self._parse_bitwise() 1971 1972 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1973 expression = None 1974 from_expressions = None 1975 to_expressions = None 1976 1977 if self._match(TokenType.IN): 1978 this = self._parse_wrapped_csv(self._parse_bitwise) 1979 elif self._match(TokenType.FROM): 1980 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1981 self._match_text_seq("TO") 1982 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1983 elif self._match_text_seq("WITH", "(", "MODULUS"): 1984 this = self._parse_number() 1985 self._match_text_seq(",", "REMAINDER") 1986 expression = self._parse_number() 1987 self._match_r_paren() 1988 else: 1989 self.raise_error("Failed to parse partition bound spec.") 1990 1991 return self.expression( 1992 exp.PartitionBoundSpec, 1993 this=this, 1994 expression=expression, 1995 from_expressions=from_expressions, 1996 to_expressions=to_expressions, 1997 ) 1998 1999 # https://www.postgresql.org/docs/current/sql-createtable.html 2000 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2001 if not self._match_text_seq("OF"): 2002 self._retreat(self._index - 1) 2003 return None 2004 2005 this = self._parse_table(schema=True) 2006 2007 if self._match(TokenType.DEFAULT): 2008 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2009 elif self._match_text_seq("FOR", "VALUES"): 2010 expression = self._parse_partition_bound_spec() 2011 else: 2012 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2013 2014 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2015 2016 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2017 self._match(TokenType.EQ) 2018 return self.expression( 2019 exp.PartitionedByProperty, 2020 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2021 ) 2022 2023 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2024 if self._match_text_seq("AND", "STATISTICS"): 2025 statistics = True 2026 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2027 statistics = False 2028 else: 2029 statistics = None 2030 2031 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2032 2033 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2034 if self._match_text_seq("SQL"): 2035 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2036 return None 2037 2038 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2039 if self._match_text_seq("SQL", "DATA"): 2040 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2041 return None 2042 2043 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2044 if self._match_text_seq("PRIMARY", "INDEX"): 2045 return exp.NoPrimaryIndexProperty() 2046 if self._match_text_seq("SQL"): 2047 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2048 return None 2049 2050 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2051 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2052 return exp.OnCommitProperty() 2053 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2054 return exp.OnCommitProperty(delete=True) 2055 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2056 2057 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2058 if self._match_text_seq("SQL", "DATA"): 2059 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2060 return None 2061 2062 def _parse_distkey(self) -> exp.DistKeyProperty: 2063 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2064 2065 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2066 table = self._parse_table(schema=True) 2067 2068 options = [] 2069 while self._match_texts(("INCLUDING", "EXCLUDING")): 2070 this = self._prev.text.upper() 2071 2072 id_var = self._parse_id_var() 2073 if not id_var: 2074 return None 2075 2076 options.append( 2077 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2078 ) 2079 2080 return self.expression(exp.LikeProperty, this=table, expressions=options) 2081 2082 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2083 return self.expression( 2084 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2085 ) 2086 2087 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2088 self._match(TokenType.EQ) 2089 return self.expression( 2090 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2091 ) 2092 2093 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2094 self._match_text_seq("WITH", "CONNECTION") 2095 return self.expression( 2096 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2097 ) 2098 2099 def _parse_returns(self) -> exp.ReturnsProperty: 2100 value: t.Optional[exp.Expression] 2101 is_table = self._match(TokenType.TABLE) 2102 2103 if is_table: 2104 if self._match(TokenType.LT): 2105 value = self.expression( 2106 exp.Schema, 2107 this="TABLE", 2108 expressions=self._parse_csv(self._parse_struct_types), 2109 ) 2110 if not self._match(TokenType.GT): 2111 self.raise_error("Expecting >") 2112 else: 2113 value = self._parse_schema(exp.var("TABLE")) 2114 else: 2115 value = self._parse_types() 2116 2117 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2118 2119 def _parse_describe(self) -> exp.Describe: 2120 kind = self._match_set(self.CREATABLES) and self._prev.text 2121 extended = self._match_text_seq("EXTENDED") 2122 this = self._parse_table(schema=True) 2123 properties = self._parse_properties() 2124 expressions = properties.expressions if properties else None 2125 return self.expression( 2126 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2127 ) 2128 2129 def _parse_insert(self) -> exp.Insert: 2130 comments = ensure_list(self._prev_comments) 2131 hint = self._parse_hint() 2132 overwrite = self._match(TokenType.OVERWRITE) 2133 ignore = self._match(TokenType.IGNORE) 2134 local = self._match_text_seq("LOCAL") 2135 alternative = None 2136 2137 if self._match_text_seq("DIRECTORY"): 2138 this: t.Optional[exp.Expression] = self.expression( 2139 exp.Directory, 2140 this=self._parse_var_or_string(), 2141 local=local, 2142 row_format=self._parse_row_format(match_row=True), 2143 ) 2144 else: 2145 if self._match(TokenType.OR): 2146 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2147 2148 self._match(TokenType.INTO) 2149 comments += ensure_list(self._prev_comments) 2150 self._match(TokenType.TABLE) 2151 this = self._parse_table(schema=True) 2152 2153 returning = self._parse_returning() 2154 2155 return self.expression( 2156 exp.Insert, 2157 comments=comments, 2158 hint=hint, 2159 this=this, 2160 by_name=self._match_text_seq("BY", "NAME"), 2161 exists=self._parse_exists(), 2162 partition=self._parse_partition(), 2163 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2164 and self._parse_conjunction(), 2165 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2166 conflict=self._parse_on_conflict(), 2167 returning=returning or self._parse_returning(), 2168 overwrite=overwrite, 2169 alternative=alternative, 2170 ignore=ignore, 2171 ) 2172 2173 def _parse_kill(self) -> exp.Kill: 2174 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2175 2176 return self.expression( 2177 exp.Kill, 2178 this=self._parse_primary(), 2179 kind=kind, 2180 ) 2181 2182 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2183 conflict = self._match_text_seq("ON", "CONFLICT") 2184 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2185 2186 if not conflict and not duplicate: 2187 return None 2188 2189 conflict_keys = None 2190 constraint = None 2191 2192 if conflict: 2193 if self._match_text_seq("ON", "CONSTRAINT"): 2194 constraint = self._parse_id_var() 2195 elif self._match(TokenType.L_PAREN): 2196 conflict_keys = self._parse_csv(self._parse_id_var) 2197 self._match_r_paren() 2198 2199 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2200 if self._prev.token_type == TokenType.UPDATE: 2201 self._match(TokenType.SET) 2202 expressions = self._parse_csv(self._parse_equality) 2203 else: 2204 expressions = None 2205 2206 return self.expression( 2207 exp.OnConflict, 2208 duplicate=duplicate, 2209 expressions=expressions, 2210 action=action, 2211 conflict_keys=conflict_keys, 2212 constraint=constraint, 2213 ) 2214 2215 def _parse_returning(self) -> t.Optional[exp.Returning]: 2216 if not self._match(TokenType.RETURNING): 2217 return None 2218 return self.expression( 2219 exp.Returning, 2220 expressions=self._parse_csv(self._parse_expression), 2221 into=self._match(TokenType.INTO) and self._parse_table_part(), 2222 ) 2223 2224 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2225 if not self._match(TokenType.FORMAT): 2226 return None 2227 return self._parse_row_format() 2228 2229 def _parse_row_format( 2230 self, match_row: bool = False 2231 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2232 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2233 return None 2234 2235 if self._match_text_seq("SERDE"): 2236 this = self._parse_string() 2237 2238 serde_properties = None 2239 if self._match(TokenType.SERDE_PROPERTIES): 2240 serde_properties = self.expression( 2241 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2242 ) 2243 2244 return self.expression( 2245 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2246 ) 2247 2248 self._match_text_seq("DELIMITED") 2249 2250 kwargs = {} 2251 2252 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2253 kwargs["fields"] = self._parse_string() 2254 if self._match_text_seq("ESCAPED", "BY"): 2255 kwargs["escaped"] = self._parse_string() 2256 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2257 kwargs["collection_items"] = self._parse_string() 2258 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2259 kwargs["map_keys"] = self._parse_string() 2260 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2261 kwargs["lines"] = self._parse_string() 2262 if self._match_text_seq("NULL", "DEFINED", "AS"): 2263 kwargs["null"] = self._parse_string() 2264 2265 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2266 2267 def _parse_load(self) -> exp.LoadData | exp.Command: 2268 if self._match_text_seq("DATA"): 2269 local = self._match_text_seq("LOCAL") 2270 self._match_text_seq("INPATH") 2271 inpath = self._parse_string() 2272 overwrite = self._match(TokenType.OVERWRITE) 2273 self._match_pair(TokenType.INTO, TokenType.TABLE) 2274 2275 return self.expression( 2276 exp.LoadData, 2277 this=self._parse_table(schema=True), 2278 local=local, 2279 overwrite=overwrite, 2280 inpath=inpath, 2281 partition=self._parse_partition(), 2282 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2283 serde=self._match_text_seq("SERDE") and self._parse_string(), 2284 ) 2285 return self._parse_as_command(self._prev) 2286 2287 def _parse_delete(self) -> exp.Delete: 2288 # This handles MySQL's "Multiple-Table Syntax" 2289 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2290 tables = None 2291 comments = self._prev_comments 2292 if not self._match(TokenType.FROM, advance=False): 2293 tables = self._parse_csv(self._parse_table) or None 2294 2295 returning = self._parse_returning() 2296 2297 return self.expression( 2298 exp.Delete, 2299 comments=comments, 2300 tables=tables, 2301 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2302 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2303 where=self._parse_where(), 2304 returning=returning or self._parse_returning(), 2305 limit=self._parse_limit(), 2306 ) 2307 2308 def _parse_update(self) -> exp.Update: 2309 comments = self._prev_comments 2310 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2311 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2312 returning = self._parse_returning() 2313 return self.expression( 2314 exp.Update, 2315 comments=comments, 2316 **{ # type: ignore 2317 "this": this, 2318 "expressions": expressions, 2319 "from": self._parse_from(joins=True), 2320 "where": self._parse_where(), 2321 "returning": returning or self._parse_returning(), 2322 "order": self._parse_order(), 2323 "limit": self._parse_limit(), 2324 }, 2325 ) 2326 2327 def _parse_uncache(self) -> exp.Uncache: 2328 if not self._match(TokenType.TABLE): 2329 self.raise_error("Expecting TABLE after UNCACHE") 2330 2331 return self.expression( 2332 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2333 ) 2334 2335 def _parse_cache(self) -> exp.Cache: 2336 lazy = self._match_text_seq("LAZY") 2337 self._match(TokenType.TABLE) 2338 table = self._parse_table(schema=True) 2339 2340 options = [] 2341 if self._match_text_seq("OPTIONS"): 2342 self._match_l_paren() 2343 k = self._parse_string() 2344 self._match(TokenType.EQ) 2345 v = self._parse_string() 2346 options = [k, v] 2347 self._match_r_paren() 2348 2349 self._match(TokenType.ALIAS) 2350 return self.expression( 2351 exp.Cache, 2352 this=table, 2353 lazy=lazy, 2354 options=options, 2355 expression=self._parse_select(nested=True), 2356 ) 2357 2358 def _parse_partition(self) -> t.Optional[exp.Partition]: 2359 if not self._match(TokenType.PARTITION): 2360 return None 2361 2362 return self.expression( 2363 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2364 ) 2365 2366 def _parse_value(self) -> exp.Tuple: 2367 if self._match(TokenType.L_PAREN): 2368 expressions = self._parse_csv(self._parse_expression) 2369 self._match_r_paren() 2370 return self.expression(exp.Tuple, expressions=expressions) 2371 2372 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2373 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2374 2375 def _parse_projections(self) -> t.List[exp.Expression]: 2376 return self._parse_expressions() 2377 2378 def _parse_select( 2379 self, 2380 nested: bool = False, 2381 table: bool = False, 2382 parse_subquery_alias: bool = True, 2383 parse_set_operation: bool = True, 2384 ) -> t.Optional[exp.Expression]: 2385 cte = self._parse_with() 2386 2387 if cte: 2388 this = self._parse_statement() 2389 2390 if not this: 2391 self.raise_error("Failed to parse any statement following CTE") 2392 return cte 2393 2394 if "with" in this.arg_types: 2395 this.set("with", cte) 2396 else: 2397 self.raise_error(f"{this.key} does not support CTE") 2398 this = cte 2399 2400 return this 2401 2402 # duckdb supports leading with FROM x 2403 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2404 2405 if self._match(TokenType.SELECT): 2406 comments = self._prev_comments 2407 2408 hint = self._parse_hint() 2409 all_ = self._match(TokenType.ALL) 2410 distinct = self._match_set(self.DISTINCT_TOKENS) 2411 2412 kind = ( 2413 self._match(TokenType.ALIAS) 2414 and self._match_texts(("STRUCT", "VALUE")) 2415 and self._prev.text.upper() 2416 ) 2417 2418 if distinct: 2419 distinct = self.expression( 2420 exp.Distinct, 2421 on=self._parse_value() if self._match(TokenType.ON) else None, 2422 ) 2423 2424 if all_ and distinct: 2425 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2426 2427 limit = self._parse_limit(top=True) 2428 projections = self._parse_projections() 2429 2430 this = self.expression( 2431 exp.Select, 2432 kind=kind, 2433 hint=hint, 2434 distinct=distinct, 2435 expressions=projections, 2436 limit=limit, 2437 ) 2438 this.comments = comments 2439 2440 into = self._parse_into() 2441 if into: 2442 this.set("into", into) 2443 2444 if not from_: 2445 from_ = self._parse_from() 2446 2447 if from_: 2448 this.set("from", from_) 2449 2450 this = self._parse_query_modifiers(this) 2451 elif (table or nested) and self._match(TokenType.L_PAREN): 2452 if self._match(TokenType.PIVOT): 2453 this = self._parse_simplified_pivot() 2454 elif self._match(TokenType.FROM): 2455 this = exp.select("*").from_( 2456 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2457 ) 2458 else: 2459 this = ( 2460 self._parse_table() 2461 if table 2462 else self._parse_select(nested=True, parse_set_operation=False) 2463 ) 2464 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2465 2466 self._match_r_paren() 2467 2468 # We return early here so that the UNION isn't attached to the subquery by the 2469 # following call to _parse_set_operations, but instead becomes the parent node 2470 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2471 elif self._match(TokenType.VALUES, advance=False): 2472 this = self._parse_derived_table_values() 2473 elif from_: 2474 this = exp.select("*").from_(from_.this, copy=False) 2475 else: 2476 this = None 2477 2478 if parse_set_operation: 2479 return self._parse_set_operations(this) 2480 return this 2481 2482 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2483 if not skip_with_token and not self._match(TokenType.WITH): 2484 return None 2485 2486 comments = self._prev_comments 2487 recursive = self._match(TokenType.RECURSIVE) 2488 2489 expressions = [] 2490 while True: 2491 expressions.append(self._parse_cte()) 2492 2493 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2494 break 2495 else: 2496 self._match(TokenType.WITH) 2497 2498 return self.expression( 2499 exp.With, comments=comments, expressions=expressions, recursive=recursive 2500 ) 2501 2502 def _parse_cte(self) -> exp.CTE: 2503 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2504 if not alias or not alias.this: 2505 self.raise_error("Expected CTE to have alias") 2506 2507 self._match(TokenType.ALIAS) 2508 return self.expression( 2509 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2510 ) 2511 2512 def _parse_table_alias( 2513 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2514 ) -> t.Optional[exp.TableAlias]: 2515 any_token = self._match(TokenType.ALIAS) 2516 alias = ( 2517 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2518 or self._parse_string_as_identifier() 2519 ) 2520 2521 index = self._index 2522 if self._match(TokenType.L_PAREN): 2523 columns = self._parse_csv(self._parse_function_parameter) 2524 self._match_r_paren() if columns else self._retreat(index) 2525 else: 2526 columns = None 2527 2528 if not alias and not columns: 2529 return None 2530 2531 return self.expression(exp.TableAlias, this=alias, columns=columns) 2532 2533 def _parse_subquery( 2534 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2535 ) -> t.Optional[exp.Subquery]: 2536 if not this: 2537 return None 2538 2539 return self.expression( 2540 exp.Subquery, 2541 this=this, 2542 pivots=self._parse_pivots(), 2543 alias=self._parse_table_alias() if parse_alias else None, 2544 ) 2545 2546 def _implicit_unnests_to_explicit(self, this: E) -> E: 2547 from sqlglot.optimizer.normalize_identifiers import ( 2548 normalize_identifiers as _norm, 2549 ) 2550 2551 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2552 for i, join in enumerate(this.args.get("joins") or []): 2553 table = join.this 2554 normalized_table = table.copy() 2555 normalized_table.meta["maybe_column"] = True 2556 normalized_table = _norm(normalized_table, dialect=self.dialect) 2557 2558 if isinstance(table, exp.Table) and not join.args.get("on"): 2559 if normalized_table.parts[0].name in refs: 2560 table_as_column = table.to_column() 2561 unnest = exp.Unnest(expressions=[table_as_column]) 2562 2563 # Table.to_column creates a parent Alias node that we want to convert to 2564 # a TableAlias and attach to the Unnest, so it matches the parser's output 2565 if isinstance(table.args.get("alias"), exp.TableAlias): 2566 table_as_column.replace(table_as_column.this) 2567 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2568 2569 table.replace(unnest) 2570 2571 refs.add(normalized_table.alias_or_name) 2572 2573 return this 2574 2575 def _parse_query_modifiers( 2576 self, this: t.Optional[exp.Expression] 2577 ) -> t.Optional[exp.Expression]: 2578 if isinstance(this, (exp.Query, exp.Table)): 2579 for join in iter(self._parse_join, None): 2580 this.append("joins", join) 2581 for lateral in iter(self._parse_lateral, None): 2582 this.append("laterals", lateral) 2583 2584 while True: 2585 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2586 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2587 key, expression = parser(self) 2588 2589 if expression: 2590 this.set(key, expression) 2591 if key == "limit": 2592 offset = expression.args.pop("offset", None) 2593 2594 if offset: 2595 offset = exp.Offset(expression=offset) 2596 this.set("offset", offset) 2597 2598 limit_by_expressions = expression.expressions 2599 expression.set("expressions", None) 2600 offset.set("expressions", limit_by_expressions) 2601 continue 2602 break 2603 2604 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2605 this = self._implicit_unnests_to_explicit(this) 2606 2607 return this 2608 2609 def _parse_hint(self) -> t.Optional[exp.Hint]: 2610 if self._match(TokenType.HINT): 2611 hints = [] 2612 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2613 hints.extend(hint) 2614 2615 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2616 self.raise_error("Expected */ after HINT") 2617 2618 return self.expression(exp.Hint, expressions=hints) 2619 2620 return None 2621 2622 def _parse_into(self) -> t.Optional[exp.Into]: 2623 if not self._match(TokenType.INTO): 2624 return None 2625 2626 temp = self._match(TokenType.TEMPORARY) 2627 unlogged = self._match_text_seq("UNLOGGED") 2628 self._match(TokenType.TABLE) 2629 2630 return self.expression( 2631 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2632 ) 2633 2634 def _parse_from( 2635 self, joins: bool = False, skip_from_token: bool = False 2636 ) -> t.Optional[exp.From]: 2637 if not skip_from_token and not self._match(TokenType.FROM): 2638 return None 2639 2640 return self.expression( 2641 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2642 ) 2643 2644 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2645 if not self._match(TokenType.MATCH_RECOGNIZE): 2646 return None 2647 2648 self._match_l_paren() 2649 2650 partition = self._parse_partition_by() 2651 order = self._parse_order() 2652 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2653 2654 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2655 rows = exp.var("ONE ROW PER MATCH") 2656 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2657 text = "ALL ROWS PER MATCH" 2658 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2659 text += " SHOW EMPTY MATCHES" 2660 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2661 text += " OMIT EMPTY MATCHES" 2662 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2663 text += " WITH UNMATCHED ROWS" 2664 rows = exp.var(text) 2665 else: 2666 rows = None 2667 2668 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2669 text = "AFTER MATCH SKIP" 2670 if self._match_text_seq("PAST", "LAST", "ROW"): 2671 text += " PAST LAST ROW" 2672 elif self._match_text_seq("TO", "NEXT", "ROW"): 2673 text += " TO NEXT ROW" 2674 elif self._match_text_seq("TO", "FIRST"): 2675 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2676 elif self._match_text_seq("TO", "LAST"): 2677 text += f" TO LAST {self._advance_any().text}" # type: ignore 2678 after = exp.var(text) 2679 else: 2680 after = None 2681 2682 if self._match_text_seq("PATTERN"): 2683 self._match_l_paren() 2684 2685 if not self._curr: 2686 self.raise_error("Expecting )", self._curr) 2687 2688 paren = 1 2689 start = self._curr 2690 2691 while self._curr and paren > 0: 2692 if self._curr.token_type == TokenType.L_PAREN: 2693 paren += 1 2694 if self._curr.token_type == TokenType.R_PAREN: 2695 paren -= 1 2696 2697 end = self._prev 2698 self._advance() 2699 2700 if paren > 0: 2701 self.raise_error("Expecting )", self._curr) 2702 2703 pattern = exp.var(self._find_sql(start, end)) 2704 else: 2705 pattern = None 2706 2707 define = ( 2708 self._parse_csv(self._parse_name_as_expression) 2709 if self._match_text_seq("DEFINE") 2710 else None 2711 ) 2712 2713 self._match_r_paren() 2714 2715 return self.expression( 2716 exp.MatchRecognize, 2717 partition_by=partition, 2718 order=order, 2719 measures=measures, 2720 rows=rows, 2721 after=after, 2722 pattern=pattern, 2723 define=define, 2724 alias=self._parse_table_alias(), 2725 ) 2726 2727 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2728 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2729 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2730 cross_apply = False 2731 2732 if cross_apply is not None: 2733 this = self._parse_select(table=True) 2734 view = None 2735 outer = None 2736 elif self._match(TokenType.LATERAL): 2737 this = self._parse_select(table=True) 2738 view = self._match(TokenType.VIEW) 2739 outer = self._match(TokenType.OUTER) 2740 else: 2741 return None 2742 2743 if not this: 2744 this = ( 2745 self._parse_unnest() 2746 or self._parse_function() 2747 or self._parse_id_var(any_token=False) 2748 ) 2749 2750 while self._match(TokenType.DOT): 2751 this = exp.Dot( 2752 this=this, 2753 expression=self._parse_function() or self._parse_id_var(any_token=False), 2754 ) 2755 2756 if view: 2757 table = self._parse_id_var(any_token=False) 2758 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2759 table_alias: t.Optional[exp.TableAlias] = self.expression( 2760 exp.TableAlias, this=table, columns=columns 2761 ) 2762 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2763 # We move the alias from the lateral's child node to the lateral itself 2764 table_alias = this.args["alias"].pop() 2765 else: 2766 table_alias = self._parse_table_alias() 2767 2768 return self.expression( 2769 exp.Lateral, 2770 this=this, 2771 view=view, 2772 outer=outer, 2773 alias=table_alias, 2774 cross_apply=cross_apply, 2775 ) 2776 2777 def _parse_join_parts( 2778 self, 2779 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2780 return ( 2781 self._match_set(self.JOIN_METHODS) and self._prev, 2782 self._match_set(self.JOIN_SIDES) and self._prev, 2783 self._match_set(self.JOIN_KINDS) and self._prev, 2784 ) 2785 2786 def _parse_join( 2787 self, skip_join_token: bool = False, parse_bracket: bool = False 2788 ) -> t.Optional[exp.Join]: 2789 if self._match(TokenType.COMMA): 2790 return self.expression(exp.Join, this=self._parse_table()) 2791 2792 index = self._index 2793 method, side, kind = self._parse_join_parts() 2794 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2795 join = self._match(TokenType.JOIN) 2796 2797 if not skip_join_token and not join: 2798 self._retreat(index) 2799 kind = None 2800 method = None 2801 side = None 2802 2803 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2804 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2805 2806 if not skip_join_token and not join and not outer_apply and not cross_apply: 2807 return None 2808 2809 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2810 2811 if method: 2812 kwargs["method"] = method.text 2813 if side: 2814 kwargs["side"] = side.text 2815 if kind: 2816 kwargs["kind"] = kind.text 2817 if hint: 2818 kwargs["hint"] = hint 2819 2820 if self._match(TokenType.ON): 2821 kwargs["on"] = self._parse_conjunction() 2822 elif self._match(TokenType.USING): 2823 kwargs["using"] = self._parse_wrapped_id_vars() 2824 elif not (kind and kind.token_type == TokenType.CROSS): 2825 index = self._index 2826 join = self._parse_join() 2827 2828 if join and self._match(TokenType.ON): 2829 kwargs["on"] = self._parse_conjunction() 2830 elif join and self._match(TokenType.USING): 2831 kwargs["using"] = self._parse_wrapped_id_vars() 2832 else: 2833 join = None 2834 self._retreat(index) 2835 2836 kwargs["this"].set("joins", [join] if join else None) 2837 2838 comments = [c for token in (method, side, kind) if token for c in token.comments] 2839 return self.expression(exp.Join, comments=comments, **kwargs) 2840 2841 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2842 this = self._parse_conjunction() 2843 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2844 return this 2845 2846 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2847 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2848 2849 return this 2850 2851 def _parse_index( 2852 self, 2853 index: t.Optional[exp.Expression] = None, 2854 ) -> t.Optional[exp.Index]: 2855 if index: 2856 unique = None 2857 primary = None 2858 amp = None 2859 2860 self._match(TokenType.ON) 2861 self._match(TokenType.TABLE) # hive 2862 table = self._parse_table_parts(schema=True) 2863 else: 2864 unique = self._match(TokenType.UNIQUE) 2865 primary = self._match_text_seq("PRIMARY") 2866 amp = self._match_text_seq("AMP") 2867 2868 if not self._match(TokenType.INDEX): 2869 return None 2870 2871 index = self._parse_id_var() 2872 table = None 2873 2874 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2875 2876 if self._match(TokenType.L_PAREN, advance=False): 2877 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2878 else: 2879 columns = None 2880 2881 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2882 2883 return self.expression( 2884 exp.Index, 2885 this=index, 2886 table=table, 2887 using=using, 2888 columns=columns, 2889 unique=unique, 2890 primary=primary, 2891 amp=amp, 2892 include=include, 2893 partition_by=self._parse_partition_by(), 2894 where=self._parse_where(), 2895 ) 2896 2897 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2898 hints: t.List[exp.Expression] = [] 2899 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2900 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2901 hints.append( 2902 self.expression( 2903 exp.WithTableHint, 2904 expressions=self._parse_csv( 2905 lambda: self._parse_function() or self._parse_var(any_token=True) 2906 ), 2907 ) 2908 ) 2909 self._match_r_paren() 2910 else: 2911 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2912 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2913 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2914 2915 self._match_texts(("INDEX", "KEY")) 2916 if self._match(TokenType.FOR): 2917 hint.set("target", self._advance_any() and self._prev.text.upper()) 2918 2919 hint.set("expressions", self._parse_wrapped_id_vars()) 2920 hints.append(hint) 2921 2922 return hints or None 2923 2924 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2925 return ( 2926 (not schema and self._parse_function(optional_parens=False)) 2927 or self._parse_id_var(any_token=False) 2928 or self._parse_string_as_identifier() 2929 or self._parse_placeholder() 2930 ) 2931 2932 def _parse_table_parts( 2933 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 2934 ) -> exp.Table: 2935 catalog = None 2936 db = None 2937 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2938 2939 while self._match(TokenType.DOT): 2940 if catalog: 2941 # This allows nesting the table in arbitrarily many dot expressions if needed 2942 table = self.expression( 2943 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2944 ) 2945 else: 2946 catalog = db 2947 db = table 2948 # "" used for tsql FROM a..b case 2949 table = self._parse_table_part(schema=schema) or "" 2950 2951 if ( 2952 wildcard 2953 and self._is_connected() 2954 and (isinstance(table, exp.Identifier) or not table) 2955 and self._match(TokenType.STAR) 2956 ): 2957 if isinstance(table, exp.Identifier): 2958 table.args["this"] += "*" 2959 else: 2960 table = exp.Identifier(this="*") 2961 2962 if is_db_reference: 2963 catalog = db 2964 db = table 2965 table = None 2966 2967 if not table and not is_db_reference: 2968 self.raise_error(f"Expected table name but got {self._curr}") 2969 if not db and is_db_reference: 2970 self.raise_error(f"Expected database name but got {self._curr}") 2971 2972 return self.expression( 2973 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2974 ) 2975 2976 def _parse_table( 2977 self, 2978 schema: bool = False, 2979 joins: bool = False, 2980 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2981 parse_bracket: bool = False, 2982 is_db_reference: bool = False, 2983 ) -> t.Optional[exp.Expression]: 2984 lateral = self._parse_lateral() 2985 if lateral: 2986 return lateral 2987 2988 unnest = self._parse_unnest() 2989 if unnest: 2990 return unnest 2991 2992 values = self._parse_derived_table_values() 2993 if values: 2994 return values 2995 2996 subquery = self._parse_select(table=True) 2997 if subquery: 2998 if not subquery.args.get("pivots"): 2999 subquery.set("pivots", self._parse_pivots()) 3000 return subquery 3001 3002 bracket = parse_bracket and self._parse_bracket(None) 3003 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3004 3005 only = self._match(TokenType.ONLY) 3006 3007 this = t.cast( 3008 exp.Expression, 3009 bracket 3010 or self._parse_bracket( 3011 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3012 ), 3013 ) 3014 3015 if only: 3016 this.set("only", only) 3017 3018 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3019 self._match_text_seq("*") 3020 3021 if schema: 3022 return self._parse_schema(this=this) 3023 3024 version = self._parse_version() 3025 3026 if version: 3027 this.set("version", version) 3028 3029 if self.dialect.ALIAS_POST_TABLESAMPLE: 3030 table_sample = self._parse_table_sample() 3031 3032 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3033 if alias: 3034 this.set("alias", alias) 3035 3036 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3037 return self.expression( 3038 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3039 ) 3040 3041 this.set("hints", self._parse_table_hints()) 3042 3043 if not this.args.get("pivots"): 3044 this.set("pivots", self._parse_pivots()) 3045 3046 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3047 table_sample = self._parse_table_sample() 3048 3049 if table_sample: 3050 table_sample.set("this", this) 3051 this = table_sample 3052 3053 if joins: 3054 for join in iter(self._parse_join, None): 3055 this.append("joins", join) 3056 3057 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3058 this.set("ordinality", True) 3059 this.set("alias", self._parse_table_alias()) 3060 3061 return this 3062 3063 def _parse_version(self) -> t.Optional[exp.Version]: 3064 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3065 this = "TIMESTAMP" 3066 elif self._match(TokenType.VERSION_SNAPSHOT): 3067 this = "VERSION" 3068 else: 3069 return None 3070 3071 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3072 kind = self._prev.text.upper() 3073 start = self._parse_bitwise() 3074 self._match_texts(("TO", "AND")) 3075 end = self._parse_bitwise() 3076 expression: t.Optional[exp.Expression] = self.expression( 3077 exp.Tuple, expressions=[start, end] 3078 ) 3079 elif self._match_text_seq("CONTAINED", "IN"): 3080 kind = "CONTAINED IN" 3081 expression = self.expression( 3082 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3083 ) 3084 elif self._match(TokenType.ALL): 3085 kind = "ALL" 3086 expression = None 3087 else: 3088 self._match_text_seq("AS", "OF") 3089 kind = "AS OF" 3090 expression = self._parse_type() 3091 3092 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3093 3094 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3095 if not self._match(TokenType.UNNEST): 3096 return None 3097 3098 expressions = self._parse_wrapped_csv(self._parse_equality) 3099 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3100 3101 alias = self._parse_table_alias() if with_alias else None 3102 3103 if alias: 3104 if self.dialect.UNNEST_COLUMN_ONLY: 3105 if alias.args.get("columns"): 3106 self.raise_error("Unexpected extra column alias in unnest.") 3107 3108 alias.set("columns", [alias.this]) 3109 alias.set("this", None) 3110 3111 columns = alias.args.get("columns") or [] 3112 if offset and len(expressions) < len(columns): 3113 offset = columns.pop() 3114 3115 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3116 self._match(TokenType.ALIAS) 3117 offset = self._parse_id_var( 3118 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3119 ) or exp.to_identifier("offset") 3120 3121 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3122 3123 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3124 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3125 if not is_derived and not self._match_text_seq("VALUES"): 3126 return None 3127 3128 expressions = self._parse_csv(self._parse_value) 3129 alias = self._parse_table_alias() 3130 3131 if is_derived: 3132 self._match_r_paren() 3133 3134 return self.expression( 3135 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3136 ) 3137 3138 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3139 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3140 as_modifier and self._match_text_seq("USING", "SAMPLE") 3141 ): 3142 return None 3143 3144 bucket_numerator = None 3145 bucket_denominator = None 3146 bucket_field = None 3147 percent = None 3148 size = None 3149 seed = None 3150 3151 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3152 matched_l_paren = self._match(TokenType.L_PAREN) 3153 3154 if self.TABLESAMPLE_CSV: 3155 num = None 3156 expressions = self._parse_csv(self._parse_primary) 3157 else: 3158 expressions = None 3159 num = ( 3160 self._parse_factor() 3161 if self._match(TokenType.NUMBER, advance=False) 3162 else self._parse_primary() or self._parse_placeholder() 3163 ) 3164 3165 if self._match_text_seq("BUCKET"): 3166 bucket_numerator = self._parse_number() 3167 self._match_text_seq("OUT", "OF") 3168 bucket_denominator = bucket_denominator = self._parse_number() 3169 self._match(TokenType.ON) 3170 bucket_field = self._parse_field() 3171 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3172 percent = num 3173 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3174 size = num 3175 else: 3176 percent = num 3177 3178 if matched_l_paren: 3179 self._match_r_paren() 3180 3181 if self._match(TokenType.L_PAREN): 3182 method = self._parse_var(upper=True) 3183 seed = self._match(TokenType.COMMA) and self._parse_number() 3184 self._match_r_paren() 3185 elif self._match_texts(("SEED", "REPEATABLE")): 3186 seed = self._parse_wrapped(self._parse_number) 3187 3188 return self.expression( 3189 exp.TableSample, 3190 expressions=expressions, 3191 method=method, 3192 bucket_numerator=bucket_numerator, 3193 bucket_denominator=bucket_denominator, 3194 bucket_field=bucket_field, 3195 percent=percent, 3196 size=size, 3197 seed=seed, 3198 ) 3199 3200 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3201 return list(iter(self._parse_pivot, None)) or None 3202 3203 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 3204 return list(iter(self._parse_join, None)) or None 3205 3206 # https://duckdb.org/docs/sql/statements/pivot 3207 def _parse_simplified_pivot(self) -> exp.Pivot: 3208 def _parse_on() -> t.Optional[exp.Expression]: 3209 this = self._parse_bitwise() 3210 return self._parse_in(this) if self._match(TokenType.IN) else this 3211 3212 this = self._parse_table() 3213 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3214 using = self._match(TokenType.USING) and self._parse_csv( 3215 lambda: self._parse_alias(self._parse_function()) 3216 ) 3217 group = self._parse_group() 3218 return self.expression( 3219 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3220 ) 3221 3222 def _parse_pivot_in(self) -> exp.In: 3223 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3224 this = self._parse_conjunction() 3225 3226 self._match(TokenType.ALIAS) 3227 alias = self._parse_field() 3228 if alias: 3229 return self.expression(exp.PivotAlias, this=this, alias=alias) 3230 3231 return this 3232 3233 value = self._parse_column() 3234 3235 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3236 self.raise_error("Expecting IN (") 3237 3238 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3239 3240 self._match_r_paren() 3241 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3242 3243 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3244 index = self._index 3245 include_nulls = None 3246 3247 if self._match(TokenType.PIVOT): 3248 unpivot = False 3249 elif self._match(TokenType.UNPIVOT): 3250 unpivot = True 3251 3252 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3253 if self._match_text_seq("INCLUDE", "NULLS"): 3254 include_nulls = True 3255 elif self._match_text_seq("EXCLUDE", "NULLS"): 3256 include_nulls = False 3257 else: 3258 return None 3259 3260 expressions = [] 3261 3262 if not self._match(TokenType.L_PAREN): 3263 self._retreat(index) 3264 return None 3265 3266 if unpivot: 3267 expressions = self._parse_csv(self._parse_column) 3268 else: 3269 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3270 3271 if not expressions: 3272 self.raise_error("Failed to parse PIVOT's aggregation list") 3273 3274 if not self._match(TokenType.FOR): 3275 self.raise_error("Expecting FOR") 3276 3277 field = self._parse_pivot_in() 3278 3279 self._match_r_paren() 3280 3281 pivot = self.expression( 3282 exp.Pivot, 3283 expressions=expressions, 3284 field=field, 3285 unpivot=unpivot, 3286 include_nulls=include_nulls, 3287 ) 3288 3289 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3290 pivot.set("alias", self._parse_table_alias()) 3291 3292 if not unpivot: 3293 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3294 3295 columns: t.List[exp.Expression] = [] 3296 for fld in pivot.args["field"].expressions: 3297 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3298 for name in names: 3299 if self.PREFIXED_PIVOT_COLUMNS: 3300 name = f"{name}_{field_name}" if name else field_name 3301 else: 3302 name = f"{field_name}_{name}" if name else field_name 3303 3304 columns.append(exp.to_identifier(name)) 3305 3306 pivot.set("columns", columns) 3307 3308 return pivot 3309 3310 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3311 return [agg.alias for agg in aggregations] 3312 3313 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3314 if not skip_where_token and not self._match(TokenType.PREWHERE): 3315 return None 3316 3317 return self.expression( 3318 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3319 ) 3320 3321 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3322 if not skip_where_token and not self._match(TokenType.WHERE): 3323 return None 3324 3325 return self.expression( 3326 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3327 ) 3328 3329 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3330 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3331 return None 3332 3333 elements = defaultdict(list) 3334 3335 if self._match(TokenType.ALL): 3336 return self.expression(exp.Group, all=True) 3337 3338 while True: 3339 expressions = self._parse_csv(self._parse_conjunction) 3340 if expressions: 3341 elements["expressions"].extend(expressions) 3342 3343 grouping_sets = self._parse_grouping_sets() 3344 if grouping_sets: 3345 elements["grouping_sets"].extend(grouping_sets) 3346 3347 rollup = None 3348 cube = None 3349 totals = None 3350 3351 index = self._index 3352 with_ = self._match(TokenType.WITH) 3353 if self._match(TokenType.ROLLUP): 3354 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3355 elements["rollup"].extend(ensure_list(rollup)) 3356 3357 if self._match(TokenType.CUBE): 3358 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3359 elements["cube"].extend(ensure_list(cube)) 3360 3361 if self._match_text_seq("TOTALS"): 3362 totals = True 3363 elements["totals"] = True # type: ignore 3364 3365 if not (grouping_sets or rollup or cube or totals): 3366 if with_: 3367 self._retreat(index) 3368 break 3369 3370 return self.expression(exp.Group, **elements) # type: ignore 3371 3372 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3373 if not self._match(TokenType.GROUPING_SETS): 3374 return None 3375 3376 return self._parse_wrapped_csv(self._parse_grouping_set) 3377 3378 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3379 if self._match(TokenType.L_PAREN): 3380 grouping_set = self._parse_csv(self._parse_column) 3381 self._match_r_paren() 3382 return self.expression(exp.Tuple, expressions=grouping_set) 3383 3384 return self._parse_column() 3385 3386 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3387 if not skip_having_token and not self._match(TokenType.HAVING): 3388 return None 3389 return self.expression(exp.Having, this=self._parse_conjunction()) 3390 3391 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3392 if not self._match(TokenType.QUALIFY): 3393 return None 3394 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3395 3396 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3397 if skip_start_token: 3398 start = None 3399 elif self._match(TokenType.START_WITH): 3400 start = self._parse_conjunction() 3401 else: 3402 return None 3403 3404 self._match(TokenType.CONNECT_BY) 3405 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3406 exp.Prior, this=self._parse_bitwise() 3407 ) 3408 connect = self._parse_conjunction() 3409 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3410 3411 if not start and self._match(TokenType.START_WITH): 3412 start = self._parse_conjunction() 3413 3414 return self.expression(exp.Connect, start=start, connect=connect) 3415 3416 def _parse_name_as_expression(self) -> exp.Alias: 3417 return self.expression( 3418 exp.Alias, 3419 alias=self._parse_id_var(any_token=True), 3420 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3421 ) 3422 3423 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3424 if self._match_text_seq("INTERPOLATE"): 3425 return self._parse_wrapped_csv(self._parse_name_as_expression) 3426 return None 3427 3428 def _parse_order( 3429 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3430 ) -> t.Optional[exp.Expression]: 3431 siblings = None 3432 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3433 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3434 return this 3435 3436 siblings = True 3437 3438 return self.expression( 3439 exp.Order, 3440 this=this, 3441 expressions=self._parse_csv(self._parse_ordered), 3442 interpolate=self._parse_interpolate(), 3443 siblings=siblings, 3444 ) 3445 3446 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3447 if not self._match(token): 3448 return None 3449 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3450 3451 def _parse_ordered( 3452 self, parse_method: t.Optional[t.Callable] = None 3453 ) -> t.Optional[exp.Ordered]: 3454 this = parse_method() if parse_method else self._parse_conjunction() 3455 if not this: 3456 return None 3457 3458 asc = self._match(TokenType.ASC) 3459 desc = self._match(TokenType.DESC) or (asc and False) 3460 3461 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3462 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3463 3464 nulls_first = is_nulls_first or False 3465 explicitly_null_ordered = is_nulls_first or is_nulls_last 3466 3467 if ( 3468 not explicitly_null_ordered 3469 and ( 3470 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3471 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3472 ) 3473 and self.dialect.NULL_ORDERING != "nulls_are_last" 3474 ): 3475 nulls_first = True 3476 3477 if self._match_text_seq("WITH", "FILL"): 3478 with_fill = self.expression( 3479 exp.WithFill, 3480 **{ # type: ignore 3481 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3482 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3483 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3484 }, 3485 ) 3486 else: 3487 with_fill = None 3488 3489 return self.expression( 3490 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3491 ) 3492 3493 def _parse_limit( 3494 self, this: t.Optional[exp.Expression] = None, top: bool = False 3495 ) -> t.Optional[exp.Expression]: 3496 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3497 comments = self._prev_comments 3498 if top: 3499 limit_paren = self._match(TokenType.L_PAREN) 3500 expression = self._parse_term() if limit_paren else self._parse_number() 3501 3502 if limit_paren: 3503 self._match_r_paren() 3504 else: 3505 expression = self._parse_term() 3506 3507 if self._match(TokenType.COMMA): 3508 offset = expression 3509 expression = self._parse_term() 3510 else: 3511 offset = None 3512 3513 limit_exp = self.expression( 3514 exp.Limit, 3515 this=this, 3516 expression=expression, 3517 offset=offset, 3518 comments=comments, 3519 expressions=self._parse_limit_by(), 3520 ) 3521 3522 return limit_exp 3523 3524 if self._match(TokenType.FETCH): 3525 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3526 direction = self._prev.text.upper() if direction else "FIRST" 3527 3528 count = self._parse_field(tokens=self.FETCH_TOKENS) 3529 percent = self._match(TokenType.PERCENT) 3530 3531 self._match_set((TokenType.ROW, TokenType.ROWS)) 3532 3533 only = self._match_text_seq("ONLY") 3534 with_ties = self._match_text_seq("WITH", "TIES") 3535 3536 if only and with_ties: 3537 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3538 3539 return self.expression( 3540 exp.Fetch, 3541 direction=direction, 3542 count=count, 3543 percent=percent, 3544 with_ties=with_ties, 3545 ) 3546 3547 return this 3548 3549 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3550 if not self._match(TokenType.OFFSET): 3551 return this 3552 3553 count = self._parse_term() 3554 self._match_set((TokenType.ROW, TokenType.ROWS)) 3555 3556 return self.expression( 3557 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3558 ) 3559 3560 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3561 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3562 3563 def _parse_locks(self) -> t.List[exp.Lock]: 3564 locks = [] 3565 while True: 3566 if self._match_text_seq("FOR", "UPDATE"): 3567 update = True 3568 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3569 "LOCK", "IN", "SHARE", "MODE" 3570 ): 3571 update = False 3572 else: 3573 break 3574 3575 expressions = None 3576 if self._match_text_seq("OF"): 3577 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3578 3579 wait: t.Optional[bool | exp.Expression] = None 3580 if self._match_text_seq("NOWAIT"): 3581 wait = True 3582 elif self._match_text_seq("WAIT"): 3583 wait = self._parse_primary() 3584 elif self._match_text_seq("SKIP", "LOCKED"): 3585 wait = False 3586 3587 locks.append( 3588 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3589 ) 3590 3591 return locks 3592 3593 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3594 while this and self._match_set(self.SET_OPERATIONS): 3595 token_type = self._prev.token_type 3596 3597 if token_type == TokenType.UNION: 3598 operation = exp.Union 3599 elif token_type == TokenType.EXCEPT: 3600 operation = exp.Except 3601 else: 3602 operation = exp.Intersect 3603 3604 comments = self._prev.comments 3605 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3606 by_name = self._match_text_seq("BY", "NAME") 3607 expression = self._parse_select(nested=True, parse_set_operation=False) 3608 3609 this = self.expression( 3610 operation, 3611 comments=comments, 3612 this=this, 3613 distinct=distinct, 3614 by_name=by_name, 3615 expression=expression, 3616 ) 3617 3618 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3619 expression = this.expression 3620 3621 if expression: 3622 for arg in self.UNION_MODIFIERS: 3623 expr = expression.args.get(arg) 3624 if expr: 3625 this.set(arg, expr.pop()) 3626 3627 return this 3628 3629 def _parse_expression(self) -> t.Optional[exp.Expression]: 3630 return self._parse_alias(self._parse_conjunction()) 3631 3632 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3633 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3634 3635 def _parse_equality(self) -> t.Optional[exp.Expression]: 3636 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3637 3638 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3639 return self._parse_tokens(self._parse_range, self.COMPARISON) 3640 3641 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3642 this = this or self._parse_bitwise() 3643 negate = self._match(TokenType.NOT) 3644 3645 if self._match_set(self.RANGE_PARSERS): 3646 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3647 if not expression: 3648 return this 3649 3650 this = expression 3651 elif self._match(TokenType.ISNULL): 3652 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3653 3654 # Postgres supports ISNULL and NOTNULL for conditions. 3655 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3656 if self._match(TokenType.NOTNULL): 3657 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3658 this = self.expression(exp.Not, this=this) 3659 3660 if negate: 3661 this = self.expression(exp.Not, this=this) 3662 3663 if self._match(TokenType.IS): 3664 this = self._parse_is(this) 3665 3666 return this 3667 3668 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3669 index = self._index - 1 3670 negate = self._match(TokenType.NOT) 3671 3672 if self._match_text_seq("DISTINCT", "FROM"): 3673 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3674 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3675 3676 expression = self._parse_null() or self._parse_boolean() 3677 if not expression: 3678 self._retreat(index) 3679 return None 3680 3681 this = self.expression(exp.Is, this=this, expression=expression) 3682 return self.expression(exp.Not, this=this) if negate else this 3683 3684 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3685 unnest = self._parse_unnest(with_alias=False) 3686 if unnest: 3687 this = self.expression(exp.In, this=this, unnest=unnest) 3688 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3689 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3690 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3691 3692 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3693 this = self.expression(exp.In, this=this, query=expressions[0]) 3694 else: 3695 this = self.expression(exp.In, this=this, expressions=expressions) 3696 3697 if matched_l_paren: 3698 self._match_r_paren(this) 3699 elif not self._match(TokenType.R_BRACKET, expression=this): 3700 self.raise_error("Expecting ]") 3701 else: 3702 this = self.expression(exp.In, this=this, field=self._parse_field()) 3703 3704 return this 3705 3706 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3707 low = self._parse_bitwise() 3708 self._match(TokenType.AND) 3709 high = self._parse_bitwise() 3710 return self.expression(exp.Between, this=this, low=low, high=high) 3711 3712 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3713 if not self._match(TokenType.ESCAPE): 3714 return this 3715 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3716 3717 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3718 index = self._index 3719 3720 if not self._match(TokenType.INTERVAL) and match_interval: 3721 return None 3722 3723 if self._match(TokenType.STRING, advance=False): 3724 this = self._parse_primary() 3725 else: 3726 this = self._parse_term() 3727 3728 if not this or ( 3729 isinstance(this, exp.Column) 3730 and not this.table 3731 and not this.this.quoted 3732 and this.name.upper() == "IS" 3733 ): 3734 self._retreat(index) 3735 return None 3736 3737 unit = self._parse_function() or ( 3738 not self._match(TokenType.ALIAS, advance=False) 3739 and self._parse_var(any_token=True, upper=True) 3740 ) 3741 3742 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3743 # each INTERVAL expression into this canonical form so it's easy to transpile 3744 if this and this.is_number: 3745 this = exp.Literal.string(this.name) 3746 elif this and this.is_string: 3747 parts = this.name.split() 3748 3749 if len(parts) == 2: 3750 if unit: 3751 # This is not actually a unit, it's something else (e.g. a "window side") 3752 unit = None 3753 self._retreat(self._index - 1) 3754 3755 this = exp.Literal.string(parts[0]) 3756 unit = self.expression(exp.Var, this=parts[1].upper()) 3757 3758 return self.expression(exp.Interval, this=this, unit=unit) 3759 3760 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3761 this = self._parse_term() 3762 3763 while True: 3764 if self._match_set(self.BITWISE): 3765 this = self.expression( 3766 self.BITWISE[self._prev.token_type], 3767 this=this, 3768 expression=self._parse_term(), 3769 ) 3770 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3771 this = self.expression( 3772 exp.DPipe, 3773 this=this, 3774 expression=self._parse_term(), 3775 safe=not self.dialect.STRICT_STRING_CONCAT, 3776 ) 3777 elif self._match(TokenType.DQMARK): 3778 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3779 elif self._match_pair(TokenType.LT, TokenType.LT): 3780 this = self.expression( 3781 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3782 ) 3783 elif self._match_pair(TokenType.GT, TokenType.GT): 3784 this = self.expression( 3785 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3786 ) 3787 else: 3788 break 3789 3790 return this 3791 3792 def _parse_term(self) -> t.Optional[exp.Expression]: 3793 return self._parse_tokens(self._parse_factor, self.TERM) 3794 3795 def _parse_factor(self) -> t.Optional[exp.Expression]: 3796 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3797 this = parse_method() 3798 3799 while self._match_set(self.FACTOR): 3800 this = self.expression( 3801 self.FACTOR[self._prev.token_type], 3802 this=this, 3803 comments=self._prev_comments, 3804 expression=parse_method(), 3805 ) 3806 if isinstance(this, exp.Div): 3807 this.args["typed"] = self.dialect.TYPED_DIVISION 3808 this.args["safe"] = self.dialect.SAFE_DIVISION 3809 3810 return this 3811 3812 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3813 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3814 3815 def _parse_unary(self) -> t.Optional[exp.Expression]: 3816 if self._match_set(self.UNARY_PARSERS): 3817 return self.UNARY_PARSERS[self._prev.token_type](self) 3818 return self._parse_at_time_zone(self._parse_type()) 3819 3820 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3821 interval = parse_interval and self._parse_interval() 3822 if interval: 3823 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3824 while True: 3825 index = self._index 3826 self._match(TokenType.PLUS) 3827 3828 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3829 self._retreat(index) 3830 break 3831 3832 interval = self.expression( # type: ignore 3833 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3834 ) 3835 3836 return interval 3837 3838 index = self._index 3839 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3840 this = self._parse_column() 3841 3842 if data_type: 3843 if isinstance(this, exp.Literal): 3844 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3845 if parser: 3846 return parser(self, this, data_type) 3847 return self.expression(exp.Cast, this=this, to=data_type) 3848 if not data_type.expressions: 3849 self._retreat(index) 3850 return self._parse_column() 3851 return self._parse_column_ops(data_type) 3852 3853 return this and self._parse_column_ops(this) 3854 3855 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3856 this = self._parse_type() 3857 if not this: 3858 return None 3859 3860 return self.expression( 3861 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3862 ) 3863 3864 def _parse_types( 3865 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3866 ) -> t.Optional[exp.Expression]: 3867 index = self._index 3868 3869 prefix = self._match_text_seq("SYSUDTLIB", ".") 3870 3871 if not self._match_set(self.TYPE_TOKENS): 3872 identifier = allow_identifiers and self._parse_id_var( 3873 any_token=False, tokens=(TokenType.VAR,) 3874 ) 3875 if identifier: 3876 tokens = self.dialect.tokenize(identifier.name) 3877 3878 if len(tokens) != 1: 3879 self.raise_error("Unexpected identifier", self._prev) 3880 3881 if tokens[0].token_type in self.TYPE_TOKENS: 3882 self._prev = tokens[0] 3883 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3884 type_name = identifier.name 3885 3886 while self._match(TokenType.DOT): 3887 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3888 3889 return exp.DataType.build(type_name, udt=True) 3890 else: 3891 self._retreat(self._index - 1) 3892 return None 3893 else: 3894 return None 3895 3896 type_token = self._prev.token_type 3897 3898 if type_token == TokenType.PSEUDO_TYPE: 3899 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3900 3901 if type_token == TokenType.OBJECT_IDENTIFIER: 3902 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3903 3904 nested = type_token in self.NESTED_TYPE_TOKENS 3905 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3906 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3907 expressions = None 3908 maybe_func = False 3909 3910 if self._match(TokenType.L_PAREN): 3911 if is_struct: 3912 expressions = self._parse_csv(self._parse_struct_types) 3913 elif nested: 3914 expressions = self._parse_csv( 3915 lambda: self._parse_types( 3916 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3917 ) 3918 ) 3919 elif type_token in self.ENUM_TYPE_TOKENS: 3920 expressions = self._parse_csv(self._parse_equality) 3921 elif is_aggregate: 3922 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3923 any_token=False, tokens=(TokenType.VAR,) 3924 ) 3925 if not func_or_ident or not self._match(TokenType.COMMA): 3926 return None 3927 expressions = self._parse_csv( 3928 lambda: self._parse_types( 3929 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3930 ) 3931 ) 3932 expressions.insert(0, func_or_ident) 3933 else: 3934 expressions = self._parse_csv(self._parse_type_size) 3935 3936 if not expressions or not self._match(TokenType.R_PAREN): 3937 self._retreat(index) 3938 return None 3939 3940 maybe_func = True 3941 3942 this: t.Optional[exp.Expression] = None 3943 values: t.Optional[t.List[exp.Expression]] = None 3944 3945 if nested and self._match(TokenType.LT): 3946 if is_struct: 3947 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 3948 else: 3949 expressions = self._parse_csv( 3950 lambda: self._parse_types( 3951 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3952 ) 3953 ) 3954 3955 if not self._match(TokenType.GT): 3956 self.raise_error("Expecting >") 3957 3958 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3959 values = self._parse_csv(self._parse_conjunction) 3960 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3961 3962 if type_token in self.TIMESTAMPS: 3963 if self._match_text_seq("WITH", "TIME", "ZONE"): 3964 maybe_func = False 3965 tz_type = ( 3966 exp.DataType.Type.TIMETZ 3967 if type_token in self.TIMES 3968 else exp.DataType.Type.TIMESTAMPTZ 3969 ) 3970 this = exp.DataType(this=tz_type, expressions=expressions) 3971 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3972 maybe_func = False 3973 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3974 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3975 maybe_func = False 3976 elif type_token == TokenType.INTERVAL: 3977 unit = self._parse_var() 3978 3979 if self._match_text_seq("TO"): 3980 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3981 else: 3982 span = None 3983 3984 if span or not unit: 3985 this = self.expression( 3986 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3987 ) 3988 else: 3989 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3990 3991 if maybe_func and check_func: 3992 index2 = self._index 3993 peek = self._parse_string() 3994 3995 if not peek: 3996 self._retreat(index) 3997 return None 3998 3999 self._retreat(index2) 4000 4001 if not this: 4002 if self._match_text_seq("UNSIGNED"): 4003 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4004 if not unsigned_type_token: 4005 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4006 4007 type_token = unsigned_type_token or type_token 4008 4009 this = exp.DataType( 4010 this=exp.DataType.Type[type_token.value], 4011 expressions=expressions, 4012 nested=nested, 4013 values=values, 4014 prefix=prefix, 4015 ) 4016 4017 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4018 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4019 4020 return this 4021 4022 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4023 index = self._index 4024 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4025 self._match(TokenType.COLON) 4026 column_def = self._parse_column_def(this) 4027 4028 if type_required and ( 4029 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4030 ): 4031 self._retreat(index) 4032 return self._parse_types() 4033 4034 return column_def 4035 4036 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4037 if not self._match_text_seq("AT", "TIME", "ZONE"): 4038 return this 4039 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4040 4041 def _parse_column(self) -> t.Optional[exp.Expression]: 4042 this = self._parse_column_reference() 4043 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4044 4045 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4046 this = self._parse_field() 4047 if ( 4048 not this 4049 and self._match(TokenType.VALUES, advance=False) 4050 and self.VALUES_FOLLOWED_BY_PAREN 4051 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4052 ): 4053 this = self._parse_id_var() 4054 4055 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 4056 4057 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4058 this = self._parse_bracket(this) 4059 4060 while self._match_set(self.COLUMN_OPERATORS): 4061 op_token = self._prev.token_type 4062 op = self.COLUMN_OPERATORS.get(op_token) 4063 4064 if op_token == TokenType.DCOLON: 4065 field = self._parse_types() 4066 if not field: 4067 self.raise_error("Expected type") 4068 elif op and self._curr: 4069 field = self._parse_column_reference() 4070 else: 4071 field = self._parse_field(anonymous_func=True, any_token=True) 4072 4073 if isinstance(field, exp.Func): 4074 # bigquery allows function calls like x.y.count(...) 4075 # SAFE.SUBSTR(...) 4076 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4077 this = self._replace_columns_with_dots(this) 4078 4079 if op: 4080 this = op(self, this, field) 4081 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4082 this = self.expression( 4083 exp.Column, 4084 this=field, 4085 table=this.this, 4086 db=this.args.get("table"), 4087 catalog=this.args.get("db"), 4088 ) 4089 else: 4090 this = self.expression(exp.Dot, this=this, expression=field) 4091 this = self._parse_bracket(this) 4092 return this 4093 4094 def _parse_primary(self) -> t.Optional[exp.Expression]: 4095 if self._match_set(self.PRIMARY_PARSERS): 4096 token_type = self._prev.token_type 4097 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4098 4099 if token_type == TokenType.STRING: 4100 expressions = [primary] 4101 while self._match(TokenType.STRING): 4102 expressions.append(exp.Literal.string(self._prev.text)) 4103 4104 if len(expressions) > 1: 4105 return self.expression(exp.Concat, expressions=expressions) 4106 4107 return primary 4108 4109 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4110 return exp.Literal.number(f"0.{self._prev.text}") 4111 4112 if self._match(TokenType.L_PAREN): 4113 comments = self._prev_comments 4114 query = self._parse_select() 4115 4116 if query: 4117 expressions = [query] 4118 else: 4119 expressions = self._parse_expressions() 4120 4121 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4122 4123 if isinstance(this, exp.UNWRAPPED_QUERIES): 4124 this = self._parse_set_operations( 4125 self._parse_subquery(this=this, parse_alias=False) 4126 ) 4127 elif isinstance(this, exp.Subquery): 4128 this = self._parse_subquery( 4129 this=self._parse_set_operations(this), parse_alias=False 4130 ) 4131 elif len(expressions) > 1: 4132 this = self.expression(exp.Tuple, expressions=expressions) 4133 else: 4134 this = self.expression(exp.Paren, this=this) 4135 4136 if this: 4137 this.add_comments(comments) 4138 4139 self._match_r_paren(expression=this) 4140 return this 4141 4142 return None 4143 4144 def _parse_field( 4145 self, 4146 any_token: bool = False, 4147 tokens: t.Optional[t.Collection[TokenType]] = None, 4148 anonymous_func: bool = False, 4149 ) -> t.Optional[exp.Expression]: 4150 return ( 4151 self._parse_primary() 4152 or self._parse_function(anonymous=anonymous_func) 4153 or self._parse_id_var(any_token=any_token, tokens=tokens) 4154 ) 4155 4156 def _parse_function( 4157 self, 4158 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4159 anonymous: bool = False, 4160 optional_parens: bool = True, 4161 ) -> t.Optional[exp.Expression]: 4162 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4163 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4164 fn_syntax = False 4165 if ( 4166 self._match(TokenType.L_BRACE, advance=False) 4167 and self._next 4168 and self._next.text.upper() == "FN" 4169 ): 4170 self._advance(2) 4171 fn_syntax = True 4172 4173 func = self._parse_function_call( 4174 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4175 ) 4176 4177 if fn_syntax: 4178 self._match(TokenType.R_BRACE) 4179 4180 return func 4181 4182 def _parse_function_call( 4183 self, 4184 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4185 anonymous: bool = False, 4186 optional_parens: bool = True, 4187 ) -> t.Optional[exp.Expression]: 4188 if not self._curr: 4189 return None 4190 4191 comments = self._curr.comments 4192 token_type = self._curr.token_type 4193 this = self._curr.text 4194 upper = this.upper() 4195 4196 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4197 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4198 self._advance() 4199 return parser(self) 4200 4201 if not self._next or self._next.token_type != TokenType.L_PAREN: 4202 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4203 self._advance() 4204 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4205 4206 return None 4207 4208 if token_type not in self.FUNC_TOKENS: 4209 return None 4210 4211 self._advance(2) 4212 4213 parser = self.FUNCTION_PARSERS.get(upper) 4214 if parser and not anonymous: 4215 this = parser(self) 4216 else: 4217 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4218 4219 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4220 this = self.expression(subquery_predicate, this=self._parse_select()) 4221 self._match_r_paren() 4222 return this 4223 4224 if functions is None: 4225 functions = self.FUNCTIONS 4226 4227 function = functions.get(upper) 4228 4229 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4230 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4231 4232 if alias: 4233 args = self._kv_to_prop_eq(args) 4234 4235 if function and not anonymous: 4236 if "dialect" in function.__code__.co_varnames: 4237 func = function(args, dialect=self.dialect) 4238 else: 4239 func = function(args) 4240 4241 func = self.validate_expression(func, args) 4242 if not self.dialect.NORMALIZE_FUNCTIONS: 4243 func.meta["name"] = this 4244 4245 this = func 4246 else: 4247 if token_type == TokenType.IDENTIFIER: 4248 this = exp.Identifier(this=this, quoted=True) 4249 this = self.expression(exp.Anonymous, this=this, expressions=args) 4250 4251 if isinstance(this, exp.Expression): 4252 this.add_comments(comments) 4253 4254 self._match_r_paren(this) 4255 return self._parse_window(this) 4256 4257 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4258 transformed = [] 4259 4260 for e in expressions: 4261 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4262 if isinstance(e, exp.Alias): 4263 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4264 4265 if not isinstance(e, exp.PropertyEQ): 4266 e = self.expression( 4267 exp.PropertyEQ, this=exp.to_identifier(e.name), expression=e.expression 4268 ) 4269 4270 if isinstance(e.this, exp.Column): 4271 e.this.replace(e.this.this) 4272 4273 transformed.append(e) 4274 4275 return transformed 4276 4277 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4278 return self._parse_column_def(self._parse_id_var()) 4279 4280 def _parse_user_defined_function( 4281 self, kind: t.Optional[TokenType] = None 4282 ) -> t.Optional[exp.Expression]: 4283 this = self._parse_id_var() 4284 4285 while self._match(TokenType.DOT): 4286 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4287 4288 if not self._match(TokenType.L_PAREN): 4289 return this 4290 4291 expressions = self._parse_csv(self._parse_function_parameter) 4292 self._match_r_paren() 4293 return self.expression( 4294 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4295 ) 4296 4297 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4298 literal = self._parse_primary() 4299 if literal: 4300 return self.expression(exp.Introducer, this=token.text, expression=literal) 4301 4302 return self.expression(exp.Identifier, this=token.text) 4303 4304 def _parse_session_parameter(self) -> exp.SessionParameter: 4305 kind = None 4306 this = self._parse_id_var() or self._parse_primary() 4307 4308 if this and self._match(TokenType.DOT): 4309 kind = this.name 4310 this = self._parse_var() or self._parse_primary() 4311 4312 return self.expression(exp.SessionParameter, this=this, kind=kind) 4313 4314 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4315 index = self._index 4316 4317 if self._match(TokenType.L_PAREN): 4318 expressions = t.cast( 4319 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4320 ) 4321 4322 if not self._match(TokenType.R_PAREN): 4323 self._retreat(index) 4324 else: 4325 expressions = [self._parse_id_var()] 4326 4327 if self._match_set(self.LAMBDAS): 4328 return self.LAMBDAS[self._prev.token_type](self, expressions) 4329 4330 self._retreat(index) 4331 4332 this: t.Optional[exp.Expression] 4333 4334 if self._match(TokenType.DISTINCT): 4335 this = self.expression( 4336 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4337 ) 4338 else: 4339 this = self._parse_select_or_expression(alias=alias) 4340 4341 return self._parse_limit( 4342 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4343 ) 4344 4345 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4346 index = self._index 4347 4348 if not self.errors: 4349 try: 4350 if self._parse_select(nested=True): 4351 return this 4352 except ParseError: 4353 pass 4354 finally: 4355 self.errors.clear() 4356 self._retreat(index) 4357 4358 if not self._match(TokenType.L_PAREN): 4359 return this 4360 4361 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4362 4363 self._match_r_paren() 4364 return self.expression(exp.Schema, this=this, expressions=args) 4365 4366 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4367 return self._parse_column_def(self._parse_field(any_token=True)) 4368 4369 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4370 # column defs are not really columns, they're identifiers 4371 if isinstance(this, exp.Column): 4372 this = this.this 4373 4374 kind = self._parse_types(schema=True) 4375 4376 if self._match_text_seq("FOR", "ORDINALITY"): 4377 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4378 4379 constraints: t.List[exp.Expression] = [] 4380 4381 if not kind and self._match(TokenType.ALIAS): 4382 constraints.append( 4383 self.expression( 4384 exp.ComputedColumnConstraint, 4385 this=self._parse_conjunction(), 4386 persisted=self._match_text_seq("PERSISTED"), 4387 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4388 ) 4389 ) 4390 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4391 self._match(TokenType.ALIAS) 4392 constraints.append( 4393 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4394 ) 4395 4396 while True: 4397 constraint = self._parse_column_constraint() 4398 if not constraint: 4399 break 4400 constraints.append(constraint) 4401 4402 if not kind and not constraints: 4403 return this 4404 4405 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4406 4407 def _parse_auto_increment( 4408 self, 4409 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4410 start = None 4411 increment = None 4412 4413 if self._match(TokenType.L_PAREN, advance=False): 4414 args = self._parse_wrapped_csv(self._parse_bitwise) 4415 start = seq_get(args, 0) 4416 increment = seq_get(args, 1) 4417 elif self._match_text_seq("START"): 4418 start = self._parse_bitwise() 4419 self._match_text_seq("INCREMENT") 4420 increment = self._parse_bitwise() 4421 4422 if start and increment: 4423 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4424 4425 return exp.AutoIncrementColumnConstraint() 4426 4427 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4428 if not self._match_text_seq("REFRESH"): 4429 self._retreat(self._index - 1) 4430 return None 4431 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4432 4433 def _parse_compress(self) -> exp.CompressColumnConstraint: 4434 if self._match(TokenType.L_PAREN, advance=False): 4435 return self.expression( 4436 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4437 ) 4438 4439 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4440 4441 def _parse_generated_as_identity( 4442 self, 4443 ) -> ( 4444 exp.GeneratedAsIdentityColumnConstraint 4445 | exp.ComputedColumnConstraint 4446 | exp.GeneratedAsRowColumnConstraint 4447 ): 4448 if self._match_text_seq("BY", "DEFAULT"): 4449 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4450 this = self.expression( 4451 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4452 ) 4453 else: 4454 self._match_text_seq("ALWAYS") 4455 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4456 4457 self._match(TokenType.ALIAS) 4458 4459 if self._match_text_seq("ROW"): 4460 start = self._match_text_seq("START") 4461 if not start: 4462 self._match(TokenType.END) 4463 hidden = self._match_text_seq("HIDDEN") 4464 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4465 4466 identity = self._match_text_seq("IDENTITY") 4467 4468 if self._match(TokenType.L_PAREN): 4469 if self._match(TokenType.START_WITH): 4470 this.set("start", self._parse_bitwise()) 4471 if self._match_text_seq("INCREMENT", "BY"): 4472 this.set("increment", self._parse_bitwise()) 4473 if self._match_text_seq("MINVALUE"): 4474 this.set("minvalue", self._parse_bitwise()) 4475 if self._match_text_seq("MAXVALUE"): 4476 this.set("maxvalue", self._parse_bitwise()) 4477 4478 if self._match_text_seq("CYCLE"): 4479 this.set("cycle", True) 4480 elif self._match_text_seq("NO", "CYCLE"): 4481 this.set("cycle", False) 4482 4483 if not identity: 4484 this.set("expression", self._parse_bitwise()) 4485 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4486 args = self._parse_csv(self._parse_bitwise) 4487 this.set("start", seq_get(args, 0)) 4488 this.set("increment", seq_get(args, 1)) 4489 4490 self._match_r_paren() 4491 4492 return this 4493 4494 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4495 self._match_text_seq("LENGTH") 4496 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4497 4498 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4499 if self._match_text_seq("NULL"): 4500 return self.expression(exp.NotNullColumnConstraint) 4501 if self._match_text_seq("CASESPECIFIC"): 4502 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4503 if self._match_text_seq("FOR", "REPLICATION"): 4504 return self.expression(exp.NotForReplicationColumnConstraint) 4505 return None 4506 4507 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4508 if self._match(TokenType.CONSTRAINT): 4509 this = self._parse_id_var() 4510 else: 4511 this = None 4512 4513 if self._match_texts(self.CONSTRAINT_PARSERS): 4514 return self.expression( 4515 exp.ColumnConstraint, 4516 this=this, 4517 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4518 ) 4519 4520 return this 4521 4522 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4523 if not self._match(TokenType.CONSTRAINT): 4524 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4525 4526 return self.expression( 4527 exp.Constraint, 4528 this=self._parse_id_var(), 4529 expressions=self._parse_unnamed_constraints(), 4530 ) 4531 4532 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4533 constraints = [] 4534 while True: 4535 constraint = self._parse_unnamed_constraint() or self._parse_function() 4536 if not constraint: 4537 break 4538 constraints.append(constraint) 4539 4540 return constraints 4541 4542 def _parse_unnamed_constraint( 4543 self, constraints: t.Optional[t.Collection[str]] = None 4544 ) -> t.Optional[exp.Expression]: 4545 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4546 constraints or self.CONSTRAINT_PARSERS 4547 ): 4548 return None 4549 4550 constraint = self._prev.text.upper() 4551 if constraint not in self.CONSTRAINT_PARSERS: 4552 self.raise_error(f"No parser found for schema constraint {constraint}.") 4553 4554 return self.CONSTRAINT_PARSERS[constraint](self) 4555 4556 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4557 self._match_text_seq("KEY") 4558 return self.expression( 4559 exp.UniqueColumnConstraint, 4560 this=self._parse_schema(self._parse_id_var(any_token=False)), 4561 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4562 on_conflict=self._parse_on_conflict(), 4563 ) 4564 4565 def _parse_key_constraint_options(self) -> t.List[str]: 4566 options = [] 4567 while True: 4568 if not self._curr: 4569 break 4570 4571 if self._match(TokenType.ON): 4572 action = None 4573 on = self._advance_any() and self._prev.text 4574 4575 if self._match_text_seq("NO", "ACTION"): 4576 action = "NO ACTION" 4577 elif self._match_text_seq("CASCADE"): 4578 action = "CASCADE" 4579 elif self._match_text_seq("RESTRICT"): 4580 action = "RESTRICT" 4581 elif self._match_pair(TokenType.SET, TokenType.NULL): 4582 action = "SET NULL" 4583 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4584 action = "SET DEFAULT" 4585 else: 4586 self.raise_error("Invalid key constraint") 4587 4588 options.append(f"ON {on} {action}") 4589 elif self._match_text_seq("NOT", "ENFORCED"): 4590 options.append("NOT ENFORCED") 4591 elif self._match_text_seq("DEFERRABLE"): 4592 options.append("DEFERRABLE") 4593 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4594 options.append("INITIALLY DEFERRED") 4595 elif self._match_text_seq("NORELY"): 4596 options.append("NORELY") 4597 elif self._match_text_seq("MATCH", "FULL"): 4598 options.append("MATCH FULL") 4599 else: 4600 break 4601 4602 return options 4603 4604 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4605 if match and not self._match(TokenType.REFERENCES): 4606 return None 4607 4608 expressions = None 4609 this = self._parse_table(schema=True) 4610 options = self._parse_key_constraint_options() 4611 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4612 4613 def _parse_foreign_key(self) -> exp.ForeignKey: 4614 expressions = self._parse_wrapped_id_vars() 4615 reference = self._parse_references() 4616 options = {} 4617 4618 while self._match(TokenType.ON): 4619 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4620 self.raise_error("Expected DELETE or UPDATE") 4621 4622 kind = self._prev.text.lower() 4623 4624 if self._match_text_seq("NO", "ACTION"): 4625 action = "NO ACTION" 4626 elif self._match(TokenType.SET): 4627 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4628 action = "SET " + self._prev.text.upper() 4629 else: 4630 self._advance() 4631 action = self._prev.text.upper() 4632 4633 options[kind] = action 4634 4635 return self.expression( 4636 exp.ForeignKey, 4637 expressions=expressions, 4638 reference=reference, 4639 **options, # type: ignore 4640 ) 4641 4642 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4643 return self._parse_field() 4644 4645 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4646 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4647 self._retreat(self._index - 1) 4648 return None 4649 4650 id_vars = self._parse_wrapped_id_vars() 4651 return self.expression( 4652 exp.PeriodForSystemTimeConstraint, 4653 this=seq_get(id_vars, 0), 4654 expression=seq_get(id_vars, 1), 4655 ) 4656 4657 def _parse_primary_key( 4658 self, wrapped_optional: bool = False, in_props: bool = False 4659 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4660 desc = ( 4661 self._match_set((TokenType.ASC, TokenType.DESC)) 4662 and self._prev.token_type == TokenType.DESC 4663 ) 4664 4665 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4666 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4667 4668 expressions = self._parse_wrapped_csv( 4669 self._parse_primary_key_part, optional=wrapped_optional 4670 ) 4671 options = self._parse_key_constraint_options() 4672 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4673 4674 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4675 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4676 4677 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4678 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4679 return this 4680 4681 bracket_kind = self._prev.token_type 4682 expressions = self._parse_csv( 4683 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4684 ) 4685 4686 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4687 self.raise_error("Expected ]") 4688 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4689 self.raise_error("Expected }") 4690 4691 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4692 if bracket_kind == TokenType.L_BRACE: 4693 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4694 elif not this or this.name.upper() == "ARRAY": 4695 this = self.expression(exp.Array, expressions=expressions) 4696 else: 4697 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4698 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4699 4700 self._add_comments(this) 4701 return self._parse_bracket(this) 4702 4703 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4704 if self._match(TokenType.COLON): 4705 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4706 return this 4707 4708 def _parse_case(self) -> t.Optional[exp.Expression]: 4709 ifs = [] 4710 default = None 4711 4712 comments = self._prev_comments 4713 expression = self._parse_conjunction() 4714 4715 while self._match(TokenType.WHEN): 4716 this = self._parse_conjunction() 4717 self._match(TokenType.THEN) 4718 then = self._parse_conjunction() 4719 ifs.append(self.expression(exp.If, this=this, true=then)) 4720 4721 if self._match(TokenType.ELSE): 4722 default = self._parse_conjunction() 4723 4724 if not self._match(TokenType.END): 4725 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4726 default = exp.column("interval") 4727 else: 4728 self.raise_error("Expected END after CASE", self._prev) 4729 4730 return self._parse_window( 4731 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4732 ) 4733 4734 def _parse_if(self) -> t.Optional[exp.Expression]: 4735 if self._match(TokenType.L_PAREN): 4736 args = self._parse_csv(self._parse_conjunction) 4737 this = self.validate_expression(exp.If.from_arg_list(args), args) 4738 self._match_r_paren() 4739 else: 4740 index = self._index - 1 4741 4742 if self.NO_PAREN_IF_COMMANDS and index == 0: 4743 return self._parse_as_command(self._prev) 4744 4745 condition = self._parse_conjunction() 4746 4747 if not condition: 4748 self._retreat(index) 4749 return None 4750 4751 self._match(TokenType.THEN) 4752 true = self._parse_conjunction() 4753 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4754 self._match(TokenType.END) 4755 this = self.expression(exp.If, this=condition, true=true, false=false) 4756 4757 return self._parse_window(this) 4758 4759 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4760 if not self._match_text_seq("VALUE", "FOR"): 4761 self._retreat(self._index - 1) 4762 return None 4763 4764 return self.expression( 4765 exp.NextValueFor, 4766 this=self._parse_column(), 4767 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4768 ) 4769 4770 def _parse_extract(self) -> exp.Extract: 4771 this = self._parse_function() or self._parse_var() or self._parse_type() 4772 4773 if self._match(TokenType.FROM): 4774 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4775 4776 if not self._match(TokenType.COMMA): 4777 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4778 4779 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4780 4781 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4782 this = self._parse_conjunction() 4783 4784 if not self._match(TokenType.ALIAS): 4785 if self._match(TokenType.COMMA): 4786 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4787 4788 self.raise_error("Expected AS after CAST") 4789 4790 fmt = None 4791 to = self._parse_types() 4792 4793 if self._match(TokenType.FORMAT): 4794 fmt_string = self._parse_string() 4795 fmt = self._parse_at_time_zone(fmt_string) 4796 4797 if not to: 4798 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4799 if to.this in exp.DataType.TEMPORAL_TYPES: 4800 this = self.expression( 4801 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4802 this=this, 4803 format=exp.Literal.string( 4804 format_time( 4805 fmt_string.this if fmt_string else "", 4806 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4807 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4808 ) 4809 ), 4810 ) 4811 4812 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4813 this.set("zone", fmt.args["zone"]) 4814 return this 4815 elif not to: 4816 self.raise_error("Expected TYPE after CAST") 4817 elif isinstance(to, exp.Identifier): 4818 to = exp.DataType.build(to.name, udt=True) 4819 elif to.this == exp.DataType.Type.CHAR: 4820 if self._match(TokenType.CHARACTER_SET): 4821 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4822 4823 return self.expression( 4824 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4825 ) 4826 4827 def _parse_string_agg(self) -> exp.Expression: 4828 if self._match(TokenType.DISTINCT): 4829 args: t.List[t.Optional[exp.Expression]] = [ 4830 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4831 ] 4832 if self._match(TokenType.COMMA): 4833 args.extend(self._parse_csv(self._parse_conjunction)) 4834 else: 4835 args = self._parse_csv(self._parse_conjunction) # type: ignore 4836 4837 index = self._index 4838 if not self._match(TokenType.R_PAREN) and args: 4839 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4840 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4841 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4842 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4843 4844 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4845 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4846 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4847 if not self._match_text_seq("WITHIN", "GROUP"): 4848 self._retreat(index) 4849 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4850 4851 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4852 order = self._parse_order(this=seq_get(args, 0)) 4853 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4854 4855 def _parse_convert( 4856 self, strict: bool, safe: t.Optional[bool] = None 4857 ) -> t.Optional[exp.Expression]: 4858 this = self._parse_bitwise() 4859 4860 if self._match(TokenType.USING): 4861 to: t.Optional[exp.Expression] = self.expression( 4862 exp.CharacterSet, this=self._parse_var() 4863 ) 4864 elif self._match(TokenType.COMMA): 4865 to = self._parse_types() 4866 else: 4867 to = None 4868 4869 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4870 4871 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4872 """ 4873 There are generally two variants of the DECODE function: 4874 4875 - DECODE(bin, charset) 4876 - DECODE(expression, search, result [, search, result] ... [, default]) 4877 4878 The second variant will always be parsed into a CASE expression. Note that NULL 4879 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4880 instead of relying on pattern matching. 4881 """ 4882 args = self._parse_csv(self._parse_conjunction) 4883 4884 if len(args) < 3: 4885 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4886 4887 expression, *expressions = args 4888 if not expression: 4889 return None 4890 4891 ifs = [] 4892 for search, result in zip(expressions[::2], expressions[1::2]): 4893 if not search or not result: 4894 return None 4895 4896 if isinstance(search, exp.Literal): 4897 ifs.append( 4898 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4899 ) 4900 elif isinstance(search, exp.Null): 4901 ifs.append( 4902 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4903 ) 4904 else: 4905 cond = exp.or_( 4906 exp.EQ(this=expression.copy(), expression=search), 4907 exp.and_( 4908 exp.Is(this=expression.copy(), expression=exp.Null()), 4909 exp.Is(this=search.copy(), expression=exp.Null()), 4910 copy=False, 4911 ), 4912 copy=False, 4913 ) 4914 ifs.append(exp.If(this=cond, true=result)) 4915 4916 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4917 4918 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4919 self._match_text_seq("KEY") 4920 key = self._parse_column() 4921 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4922 self._match_text_seq("VALUE") 4923 value = self._parse_bitwise() 4924 4925 if not key and not value: 4926 return None 4927 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4928 4929 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4930 if not this or not self._match_text_seq("FORMAT", "JSON"): 4931 return this 4932 4933 return self.expression(exp.FormatJson, this=this) 4934 4935 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4936 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4937 for value in values: 4938 if self._match_text_seq(value, "ON", on): 4939 return f"{value} ON {on}" 4940 4941 return None 4942 4943 @t.overload 4944 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 4945 4946 @t.overload 4947 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 4948 4949 def _parse_json_object(self, agg=False): 4950 star = self._parse_star() 4951 expressions = ( 4952 [star] 4953 if star 4954 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4955 ) 4956 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4957 4958 unique_keys = None 4959 if self._match_text_seq("WITH", "UNIQUE"): 4960 unique_keys = True 4961 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4962 unique_keys = False 4963 4964 self._match_text_seq("KEYS") 4965 4966 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4967 self._parse_type() 4968 ) 4969 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4970 4971 return self.expression( 4972 exp.JSONObjectAgg if agg else exp.JSONObject, 4973 expressions=expressions, 4974 null_handling=null_handling, 4975 unique_keys=unique_keys, 4976 return_type=return_type, 4977 encoding=encoding, 4978 ) 4979 4980 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4981 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4982 if not self._match_text_seq("NESTED"): 4983 this = self._parse_id_var() 4984 kind = self._parse_types(allow_identifiers=False) 4985 nested = None 4986 else: 4987 this = None 4988 kind = None 4989 nested = True 4990 4991 path = self._match_text_seq("PATH") and self._parse_string() 4992 nested_schema = nested and self._parse_json_schema() 4993 4994 return self.expression( 4995 exp.JSONColumnDef, 4996 this=this, 4997 kind=kind, 4998 path=path, 4999 nested_schema=nested_schema, 5000 ) 5001 5002 def _parse_json_schema(self) -> exp.JSONSchema: 5003 self._match_text_seq("COLUMNS") 5004 return self.expression( 5005 exp.JSONSchema, 5006 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5007 ) 5008 5009 def _parse_json_table(self) -> exp.JSONTable: 5010 this = self._parse_format_json(self._parse_bitwise()) 5011 path = self._match(TokenType.COMMA) and self._parse_string() 5012 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5013 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5014 schema = self._parse_json_schema() 5015 5016 return exp.JSONTable( 5017 this=this, 5018 schema=schema, 5019 path=path, 5020 error_handling=error_handling, 5021 empty_handling=empty_handling, 5022 ) 5023 5024 def _parse_match_against(self) -> exp.MatchAgainst: 5025 expressions = self._parse_csv(self._parse_column) 5026 5027 self._match_text_seq(")", "AGAINST", "(") 5028 5029 this = self._parse_string() 5030 5031 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5032 modifier = "IN NATURAL LANGUAGE MODE" 5033 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5034 modifier = f"{modifier} WITH QUERY EXPANSION" 5035 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5036 modifier = "IN BOOLEAN MODE" 5037 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5038 modifier = "WITH QUERY EXPANSION" 5039 else: 5040 modifier = None 5041 5042 return self.expression( 5043 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5044 ) 5045 5046 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5047 def _parse_open_json(self) -> exp.OpenJSON: 5048 this = self._parse_bitwise() 5049 path = self._match(TokenType.COMMA) and self._parse_string() 5050 5051 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5052 this = self._parse_field(any_token=True) 5053 kind = self._parse_types() 5054 path = self._parse_string() 5055 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5056 5057 return self.expression( 5058 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5059 ) 5060 5061 expressions = None 5062 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5063 self._match_l_paren() 5064 expressions = self._parse_csv(_parse_open_json_column_def) 5065 5066 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5067 5068 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5069 args = self._parse_csv(self._parse_bitwise) 5070 5071 if self._match(TokenType.IN): 5072 return self.expression( 5073 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5074 ) 5075 5076 if haystack_first: 5077 haystack = seq_get(args, 0) 5078 needle = seq_get(args, 1) 5079 else: 5080 needle = seq_get(args, 0) 5081 haystack = seq_get(args, 1) 5082 5083 return self.expression( 5084 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5085 ) 5086 5087 def _parse_predict(self) -> exp.Predict: 5088 self._match_text_seq("MODEL") 5089 this = self._parse_table() 5090 5091 self._match(TokenType.COMMA) 5092 self._match_text_seq("TABLE") 5093 5094 return self.expression( 5095 exp.Predict, 5096 this=this, 5097 expression=self._parse_table(), 5098 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5099 ) 5100 5101 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5102 args = self._parse_csv(self._parse_table) 5103 return exp.JoinHint(this=func_name.upper(), expressions=args) 5104 5105 def _parse_substring(self) -> exp.Substring: 5106 # Postgres supports the form: substring(string [from int] [for int]) 5107 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5108 5109 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5110 5111 if self._match(TokenType.FROM): 5112 args.append(self._parse_bitwise()) 5113 if self._match(TokenType.FOR): 5114 args.append(self._parse_bitwise()) 5115 5116 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5117 5118 def _parse_trim(self) -> exp.Trim: 5119 # https://www.w3resource.com/sql/character-functions/trim.php 5120 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5121 5122 position = None 5123 collation = None 5124 expression = None 5125 5126 if self._match_texts(self.TRIM_TYPES): 5127 position = self._prev.text.upper() 5128 5129 this = self._parse_bitwise() 5130 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5131 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5132 expression = self._parse_bitwise() 5133 5134 if invert_order: 5135 this, expression = expression, this 5136 5137 if self._match(TokenType.COLLATE): 5138 collation = self._parse_bitwise() 5139 5140 return self.expression( 5141 exp.Trim, this=this, position=position, expression=expression, collation=collation 5142 ) 5143 5144 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5145 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5146 5147 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5148 return self._parse_window(self._parse_id_var(), alias=True) 5149 5150 def _parse_respect_or_ignore_nulls( 5151 self, this: t.Optional[exp.Expression] 5152 ) -> t.Optional[exp.Expression]: 5153 if self._match_text_seq("IGNORE", "NULLS"): 5154 return self.expression(exp.IgnoreNulls, this=this) 5155 if self._match_text_seq("RESPECT", "NULLS"): 5156 return self.expression(exp.RespectNulls, this=this) 5157 return this 5158 5159 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5160 if self._match(TokenType.HAVING): 5161 self._match_texts(("MAX", "MIN")) 5162 max = self._prev.text.upper() != "MIN" 5163 return self.expression( 5164 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5165 ) 5166 5167 return this 5168 5169 def _parse_window( 5170 self, this: t.Optional[exp.Expression], alias: bool = False 5171 ) -> t.Optional[exp.Expression]: 5172 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5173 self._match(TokenType.WHERE) 5174 this = self.expression( 5175 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5176 ) 5177 self._match_r_paren() 5178 5179 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5180 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5181 if self._match_text_seq("WITHIN", "GROUP"): 5182 order = self._parse_wrapped(self._parse_order) 5183 this = self.expression(exp.WithinGroup, this=this, expression=order) 5184 5185 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5186 # Some dialects choose to implement and some do not. 5187 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5188 5189 # There is some code above in _parse_lambda that handles 5190 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5191 5192 # The below changes handle 5193 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5194 5195 # Oracle allows both formats 5196 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5197 # and Snowflake chose to do the same for familiarity 5198 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5199 if isinstance(this, exp.AggFunc): 5200 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5201 5202 if ignore_respect and ignore_respect is not this: 5203 ignore_respect.replace(ignore_respect.this) 5204 this = self.expression(ignore_respect.__class__, this=this) 5205 5206 this = self._parse_respect_or_ignore_nulls(this) 5207 5208 # bigquery select from window x AS (partition by ...) 5209 if alias: 5210 over = None 5211 self._match(TokenType.ALIAS) 5212 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5213 return this 5214 else: 5215 over = self._prev.text.upper() 5216 5217 if not self._match(TokenType.L_PAREN): 5218 return self.expression( 5219 exp.Window, this=this, alias=self._parse_id_var(False), over=over 5220 ) 5221 5222 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5223 5224 first = self._match(TokenType.FIRST) 5225 if self._match_text_seq("LAST"): 5226 first = False 5227 5228 partition, order = self._parse_partition_and_order() 5229 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5230 5231 if kind: 5232 self._match(TokenType.BETWEEN) 5233 start = self._parse_window_spec() 5234 self._match(TokenType.AND) 5235 end = self._parse_window_spec() 5236 5237 spec = self.expression( 5238 exp.WindowSpec, 5239 kind=kind, 5240 start=start["value"], 5241 start_side=start["side"], 5242 end=end["value"], 5243 end_side=end["side"], 5244 ) 5245 else: 5246 spec = None 5247 5248 self._match_r_paren() 5249 5250 window = self.expression( 5251 exp.Window, 5252 this=this, 5253 partition_by=partition, 5254 order=order, 5255 spec=spec, 5256 alias=window_alias, 5257 over=over, 5258 first=first, 5259 ) 5260 5261 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5262 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5263 return self._parse_window(window, alias=alias) 5264 5265 return window 5266 5267 def _parse_partition_and_order( 5268 self, 5269 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5270 return self._parse_partition_by(), self._parse_order() 5271 5272 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5273 self._match(TokenType.BETWEEN) 5274 5275 return { 5276 "value": ( 5277 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5278 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5279 or self._parse_bitwise() 5280 ), 5281 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5282 } 5283 5284 def _parse_alias( 5285 self, this: t.Optional[exp.Expression], explicit: bool = False 5286 ) -> t.Optional[exp.Expression]: 5287 any_token = self._match(TokenType.ALIAS) 5288 comments = self._prev_comments 5289 5290 if explicit and not any_token: 5291 return this 5292 5293 if self._match(TokenType.L_PAREN): 5294 aliases = self.expression( 5295 exp.Aliases, 5296 comments=comments, 5297 this=this, 5298 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5299 ) 5300 self._match_r_paren(aliases) 5301 return aliases 5302 5303 alias = self._parse_id_var(any_token) or ( 5304 self.STRING_ALIASES and self._parse_string_as_identifier() 5305 ) 5306 5307 if alias: 5308 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5309 column = this.this 5310 5311 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5312 if not this.comments and column and column.comments: 5313 this.comments = column.comments 5314 column.comments = None 5315 5316 return this 5317 5318 def _parse_id_var( 5319 self, 5320 any_token: bool = True, 5321 tokens: t.Optional[t.Collection[TokenType]] = None, 5322 ) -> t.Optional[exp.Expression]: 5323 identifier = self._parse_identifier() 5324 5325 if identifier: 5326 return identifier 5327 5328 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5329 quoted = self._prev.token_type == TokenType.STRING 5330 return exp.Identifier(this=self._prev.text, quoted=quoted) 5331 5332 return None 5333 5334 def _parse_string(self) -> t.Optional[exp.Expression]: 5335 if self._match_set(self.STRING_PARSERS): 5336 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5337 return self._parse_placeholder() 5338 5339 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5340 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5341 5342 def _parse_number(self) -> t.Optional[exp.Expression]: 5343 if self._match_set(self.NUMERIC_PARSERS): 5344 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5345 return self._parse_placeholder() 5346 5347 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5348 if self._match(TokenType.IDENTIFIER): 5349 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5350 return self._parse_placeholder() 5351 5352 def _parse_var( 5353 self, 5354 any_token: bool = False, 5355 tokens: t.Optional[t.Collection[TokenType]] = None, 5356 upper: bool = False, 5357 ) -> t.Optional[exp.Expression]: 5358 if ( 5359 (any_token and self._advance_any()) 5360 or self._match(TokenType.VAR) 5361 or (self._match_set(tokens) if tokens else False) 5362 ): 5363 return self.expression( 5364 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5365 ) 5366 return self._parse_placeholder() 5367 5368 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5369 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5370 self._advance() 5371 return self._prev 5372 return None 5373 5374 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5375 return self._parse_var() or self._parse_string() 5376 5377 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5378 return self._parse_primary() or self._parse_var(any_token=True) 5379 5380 def _parse_null(self) -> t.Optional[exp.Expression]: 5381 if self._match_set(self.NULL_TOKENS): 5382 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5383 return self._parse_placeholder() 5384 5385 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5386 if self._match(TokenType.TRUE): 5387 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5388 if self._match(TokenType.FALSE): 5389 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5390 return self._parse_placeholder() 5391 5392 def _parse_star(self) -> t.Optional[exp.Expression]: 5393 if self._match(TokenType.STAR): 5394 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5395 return self._parse_placeholder() 5396 5397 def _parse_parameter(self) -> exp.Parameter: 5398 self._match(TokenType.L_BRACE) 5399 this = self._parse_identifier() or self._parse_primary_or_var() 5400 expression = self._match(TokenType.COLON) and ( 5401 self._parse_identifier() or self._parse_primary_or_var() 5402 ) 5403 self._match(TokenType.R_BRACE) 5404 return self.expression(exp.Parameter, this=this, expression=expression) 5405 5406 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5407 if self._match_set(self.PLACEHOLDER_PARSERS): 5408 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5409 if placeholder: 5410 return placeholder 5411 self._advance(-1) 5412 return None 5413 5414 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5415 if not self._match(TokenType.EXCEPT): 5416 return None 5417 if self._match(TokenType.L_PAREN, advance=False): 5418 return self._parse_wrapped_csv(self._parse_column) 5419 5420 except_column = self._parse_column() 5421 return [except_column] if except_column else None 5422 5423 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5424 if not self._match(TokenType.REPLACE): 5425 return None 5426 if self._match(TokenType.L_PAREN, advance=False): 5427 return self._parse_wrapped_csv(self._parse_expression) 5428 5429 replace_expression = self._parse_expression() 5430 return [replace_expression] if replace_expression else None 5431 5432 def _parse_csv( 5433 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5434 ) -> t.List[exp.Expression]: 5435 parse_result = parse_method() 5436 items = [parse_result] if parse_result is not None else [] 5437 5438 while self._match(sep): 5439 self._add_comments(parse_result) 5440 parse_result = parse_method() 5441 if parse_result is not None: 5442 items.append(parse_result) 5443 5444 return items 5445 5446 def _parse_tokens( 5447 self, parse_method: t.Callable, expressions: t.Dict 5448 ) -> t.Optional[exp.Expression]: 5449 this = parse_method() 5450 5451 while self._match_set(expressions): 5452 this = self.expression( 5453 expressions[self._prev.token_type], 5454 this=this, 5455 comments=self._prev_comments, 5456 expression=parse_method(), 5457 ) 5458 5459 return this 5460 5461 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5462 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5463 5464 def _parse_wrapped_csv( 5465 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5466 ) -> t.List[exp.Expression]: 5467 return self._parse_wrapped( 5468 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5469 ) 5470 5471 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5472 wrapped = self._match(TokenType.L_PAREN) 5473 if not wrapped and not optional: 5474 self.raise_error("Expecting (") 5475 parse_result = parse_method() 5476 if wrapped: 5477 self._match_r_paren() 5478 return parse_result 5479 5480 def _parse_expressions(self) -> t.List[exp.Expression]: 5481 return self._parse_csv(self._parse_expression) 5482 5483 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5484 return self._parse_select() or self._parse_set_operations( 5485 self._parse_expression() if alias else self._parse_conjunction() 5486 ) 5487 5488 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5489 return self._parse_query_modifiers( 5490 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5491 ) 5492 5493 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5494 this = None 5495 if self._match_texts(self.TRANSACTION_KIND): 5496 this = self._prev.text 5497 5498 self._match_texts(("TRANSACTION", "WORK")) 5499 5500 modes = [] 5501 while True: 5502 mode = [] 5503 while self._match(TokenType.VAR): 5504 mode.append(self._prev.text) 5505 5506 if mode: 5507 modes.append(" ".join(mode)) 5508 if not self._match(TokenType.COMMA): 5509 break 5510 5511 return self.expression(exp.Transaction, this=this, modes=modes) 5512 5513 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5514 chain = None 5515 savepoint = None 5516 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5517 5518 self._match_texts(("TRANSACTION", "WORK")) 5519 5520 if self._match_text_seq("TO"): 5521 self._match_text_seq("SAVEPOINT") 5522 savepoint = self._parse_id_var() 5523 5524 if self._match(TokenType.AND): 5525 chain = not self._match_text_seq("NO") 5526 self._match_text_seq("CHAIN") 5527 5528 if is_rollback: 5529 return self.expression(exp.Rollback, savepoint=savepoint) 5530 5531 return self.expression(exp.Commit, chain=chain) 5532 5533 def _parse_refresh(self) -> exp.Refresh: 5534 self._match(TokenType.TABLE) 5535 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5536 5537 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5538 if not self._match_text_seq("ADD"): 5539 return None 5540 5541 self._match(TokenType.COLUMN) 5542 exists_column = self._parse_exists(not_=True) 5543 expression = self._parse_field_def() 5544 5545 if expression: 5546 expression.set("exists", exists_column) 5547 5548 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5549 if self._match_texts(("FIRST", "AFTER")): 5550 position = self._prev.text 5551 column_position = self.expression( 5552 exp.ColumnPosition, this=self._parse_column(), position=position 5553 ) 5554 expression.set("position", column_position) 5555 5556 return expression 5557 5558 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5559 drop = self._match(TokenType.DROP) and self._parse_drop() 5560 if drop and not isinstance(drop, exp.Command): 5561 drop.set("kind", drop.args.get("kind", "COLUMN")) 5562 return drop 5563 5564 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5565 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5566 return self.expression( 5567 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5568 ) 5569 5570 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5571 index = self._index - 1 5572 5573 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5574 return self._parse_csv( 5575 lambda: self.expression( 5576 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5577 ) 5578 ) 5579 5580 self._retreat(index) 5581 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5582 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5583 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5584 5585 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5586 self._match(TokenType.COLUMN) 5587 column = self._parse_field(any_token=True) 5588 5589 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5590 return self.expression(exp.AlterColumn, this=column, drop=True) 5591 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5592 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5593 if self._match(TokenType.COMMENT): 5594 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5595 5596 self._match_text_seq("SET", "DATA") 5597 return self.expression( 5598 exp.AlterColumn, 5599 this=column, 5600 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5601 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5602 using=self._match(TokenType.USING) and self._parse_conjunction(), 5603 ) 5604 5605 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5606 index = self._index - 1 5607 5608 partition_exists = self._parse_exists() 5609 if self._match(TokenType.PARTITION, advance=False): 5610 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5611 5612 self._retreat(index) 5613 return self._parse_csv(self._parse_drop_column) 5614 5615 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5616 if self._match(TokenType.COLUMN): 5617 exists = self._parse_exists() 5618 old_column = self._parse_column() 5619 to = self._match_text_seq("TO") 5620 new_column = self._parse_column() 5621 5622 if old_column is None or to is None or new_column is None: 5623 return None 5624 5625 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5626 5627 self._match_text_seq("TO") 5628 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5629 5630 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5631 start = self._prev 5632 5633 if not self._match(TokenType.TABLE): 5634 return self._parse_as_command(start) 5635 5636 exists = self._parse_exists() 5637 only = self._match_text_seq("ONLY") 5638 this = self._parse_table(schema=True) 5639 5640 if self._next: 5641 self._advance() 5642 5643 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5644 if parser: 5645 actions = ensure_list(parser(self)) 5646 options = self._parse_csv(self._parse_property) 5647 5648 if not self._curr and actions: 5649 return self.expression( 5650 exp.AlterTable, 5651 this=this, 5652 exists=exists, 5653 actions=actions, 5654 only=only, 5655 options=options, 5656 ) 5657 5658 return self._parse_as_command(start) 5659 5660 def _parse_merge(self) -> exp.Merge: 5661 self._match(TokenType.INTO) 5662 target = self._parse_table() 5663 5664 if target and self._match(TokenType.ALIAS, advance=False): 5665 target.set("alias", self._parse_table_alias()) 5666 5667 self._match(TokenType.USING) 5668 using = self._parse_table() 5669 5670 self._match(TokenType.ON) 5671 on = self._parse_conjunction() 5672 5673 return self.expression( 5674 exp.Merge, 5675 this=target, 5676 using=using, 5677 on=on, 5678 expressions=self._parse_when_matched(), 5679 ) 5680 5681 def _parse_when_matched(self) -> t.List[exp.When]: 5682 whens = [] 5683 5684 while self._match(TokenType.WHEN): 5685 matched = not self._match(TokenType.NOT) 5686 self._match_text_seq("MATCHED") 5687 source = ( 5688 False 5689 if self._match_text_seq("BY", "TARGET") 5690 else self._match_text_seq("BY", "SOURCE") 5691 ) 5692 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5693 5694 self._match(TokenType.THEN) 5695 5696 if self._match(TokenType.INSERT): 5697 _this = self._parse_star() 5698 if _this: 5699 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5700 else: 5701 then = self.expression( 5702 exp.Insert, 5703 this=self._parse_value(), 5704 expression=self._match_text_seq("VALUES") and self._parse_value(), 5705 ) 5706 elif self._match(TokenType.UPDATE): 5707 expressions = self._parse_star() 5708 if expressions: 5709 then = self.expression(exp.Update, expressions=expressions) 5710 else: 5711 then = self.expression( 5712 exp.Update, 5713 expressions=self._match(TokenType.SET) 5714 and self._parse_csv(self._parse_equality), 5715 ) 5716 elif self._match(TokenType.DELETE): 5717 then = self.expression(exp.Var, this=self._prev.text) 5718 else: 5719 then = None 5720 5721 whens.append( 5722 self.expression( 5723 exp.When, 5724 matched=matched, 5725 source=source, 5726 condition=condition, 5727 then=then, 5728 ) 5729 ) 5730 return whens 5731 5732 def _parse_show(self) -> t.Optional[exp.Expression]: 5733 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5734 if parser: 5735 return parser(self) 5736 return self._parse_as_command(self._prev) 5737 5738 def _parse_set_item_assignment( 5739 self, kind: t.Optional[str] = None 5740 ) -> t.Optional[exp.Expression]: 5741 index = self._index 5742 5743 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5744 return self._parse_set_transaction(global_=kind == "GLOBAL") 5745 5746 left = self._parse_primary() or self._parse_id_var() 5747 assignment_delimiter = self._match_texts(("=", "TO")) 5748 5749 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5750 self._retreat(index) 5751 return None 5752 5753 right = self._parse_statement() or self._parse_id_var() 5754 this = self.expression(exp.EQ, this=left, expression=right) 5755 5756 return self.expression(exp.SetItem, this=this, kind=kind) 5757 5758 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5759 self._match_text_seq("TRANSACTION") 5760 characteristics = self._parse_csv( 5761 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5762 ) 5763 return self.expression( 5764 exp.SetItem, 5765 expressions=characteristics, 5766 kind="TRANSACTION", 5767 **{"global": global_}, # type: ignore 5768 ) 5769 5770 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5771 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5772 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5773 5774 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5775 index = self._index 5776 set_ = self.expression( 5777 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5778 ) 5779 5780 if self._curr: 5781 self._retreat(index) 5782 return self._parse_as_command(self._prev) 5783 5784 return set_ 5785 5786 def _parse_var_from_options( 5787 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5788 ) -> t.Optional[exp.Var]: 5789 start = self._curr 5790 if not start: 5791 return None 5792 5793 option = start.text.upper() 5794 continuations = options.get(option) 5795 5796 index = self._index 5797 self._advance() 5798 for keywords in continuations or []: 5799 if isinstance(keywords, str): 5800 keywords = (keywords,) 5801 5802 if self._match_text_seq(*keywords): 5803 option = f"{option} {' '.join(keywords)}" 5804 break 5805 else: 5806 if continuations or continuations is None: 5807 if raise_unmatched: 5808 self.raise_error(f"Unknown option {option}") 5809 5810 self._retreat(index) 5811 return None 5812 5813 return exp.var(option) 5814 5815 def _parse_as_command(self, start: Token) -> exp.Command: 5816 while self._curr: 5817 self._advance() 5818 text = self._find_sql(start, self._prev) 5819 size = len(start.text) 5820 self._warn_unsupported() 5821 return exp.Command(this=text[:size], expression=text[size:]) 5822 5823 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5824 settings = [] 5825 5826 self._match_l_paren() 5827 kind = self._parse_id_var() 5828 5829 if self._match(TokenType.L_PAREN): 5830 while True: 5831 key = self._parse_id_var() 5832 value = self._parse_primary() 5833 5834 if not key and value is None: 5835 break 5836 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5837 self._match(TokenType.R_PAREN) 5838 5839 self._match_r_paren() 5840 5841 return self.expression( 5842 exp.DictProperty, 5843 this=this, 5844 kind=kind.this if kind else None, 5845 settings=settings, 5846 ) 5847 5848 def _parse_dict_range(self, this: str) -> exp.DictRange: 5849 self._match_l_paren() 5850 has_min = self._match_text_seq("MIN") 5851 if has_min: 5852 min = self._parse_var() or self._parse_primary() 5853 self._match_text_seq("MAX") 5854 max = self._parse_var() or self._parse_primary() 5855 else: 5856 max = self._parse_var() or self._parse_primary() 5857 min = exp.Literal.number(0) 5858 self._match_r_paren() 5859 return self.expression(exp.DictRange, this=this, min=min, max=max) 5860 5861 def _parse_comprehension( 5862 self, this: t.Optional[exp.Expression] 5863 ) -> t.Optional[exp.Comprehension]: 5864 index = self._index 5865 expression = self._parse_column() 5866 if not self._match(TokenType.IN): 5867 self._retreat(index - 1) 5868 return None 5869 iterator = self._parse_column() 5870 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5871 return self.expression( 5872 exp.Comprehension, 5873 this=this, 5874 expression=expression, 5875 iterator=iterator, 5876 condition=condition, 5877 ) 5878 5879 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5880 if self._match(TokenType.HEREDOC_STRING): 5881 return self.expression(exp.Heredoc, this=self._prev.text) 5882 5883 if not self._match_text_seq("$"): 5884 return None 5885 5886 tags = ["$"] 5887 tag_text = None 5888 5889 if self._is_connected(): 5890 self._advance() 5891 tags.append(self._prev.text.upper()) 5892 else: 5893 self.raise_error("No closing $ found") 5894 5895 if tags[-1] != "$": 5896 if self._is_connected() and self._match_text_seq("$"): 5897 tag_text = tags[-1] 5898 tags.append("$") 5899 else: 5900 self.raise_error("No closing $ found") 5901 5902 heredoc_start = self._curr 5903 5904 while self._curr: 5905 if self._match_text_seq(*tags, advance=False): 5906 this = self._find_sql(heredoc_start, self._prev) 5907 self._advance(len(tags)) 5908 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5909 5910 self._advance() 5911 5912 self.raise_error(f"No closing {''.join(tags)} found") 5913 return None 5914 5915 def _find_parser( 5916 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5917 ) -> t.Optional[t.Callable]: 5918 if not self._curr: 5919 return None 5920 5921 index = self._index 5922 this = [] 5923 while True: 5924 # The current token might be multiple words 5925 curr = self._curr.text.upper() 5926 key = curr.split(" ") 5927 this.append(curr) 5928 5929 self._advance() 5930 result, trie = in_trie(trie, key) 5931 if result == TrieResult.FAILED: 5932 break 5933 5934 if result == TrieResult.EXISTS: 5935 subparser = parsers[" ".join(this)] 5936 return subparser 5937 5938 self._retreat(index) 5939 return None 5940 5941 def _match(self, token_type, advance=True, expression=None): 5942 if not self._curr: 5943 return None 5944 5945 if self._curr.token_type == token_type: 5946 if advance: 5947 self._advance() 5948 self._add_comments(expression) 5949 return True 5950 5951 return None 5952 5953 def _match_set(self, types, advance=True): 5954 if not self._curr: 5955 return None 5956 5957 if self._curr.token_type in types: 5958 if advance: 5959 self._advance() 5960 return True 5961 5962 return None 5963 5964 def _match_pair(self, token_type_a, token_type_b, advance=True): 5965 if not self._curr or not self._next: 5966 return None 5967 5968 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5969 if advance: 5970 self._advance(2) 5971 return True 5972 5973 return None 5974 5975 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5976 if not self._match(TokenType.L_PAREN, expression=expression): 5977 self.raise_error("Expecting (") 5978 5979 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5980 if not self._match(TokenType.R_PAREN, expression=expression): 5981 self.raise_error("Expecting )") 5982 5983 def _match_texts(self, texts, advance=True): 5984 if self._curr and self._curr.text.upper() in texts: 5985 if advance: 5986 self._advance() 5987 return True 5988 return None 5989 5990 def _match_text_seq(self, *texts, advance=True): 5991 index = self._index 5992 for text in texts: 5993 if self._curr and self._curr.text.upper() == text: 5994 self._advance() 5995 else: 5996 self._retreat(index) 5997 return None 5998 5999 if not advance: 6000 self._retreat(index) 6001 6002 return True 6003 6004 @t.overload 6005 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: ... 6006 6007 @t.overload 6008 def _replace_columns_with_dots( 6009 self, this: t.Optional[exp.Expression] 6010 ) -> t.Optional[exp.Expression]: ... 6011 6012 def _replace_columns_with_dots(self, this): 6013 if isinstance(this, exp.Dot): 6014 exp.replace_children(this, self._replace_columns_with_dots) 6015 elif isinstance(this, exp.Column): 6016 exp.replace_children(this, self._replace_columns_with_dots) 6017 table = this.args.get("table") 6018 this = ( 6019 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 6020 ) 6021 6022 return this 6023 6024 def _replace_lambda( 6025 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6026 ) -> t.Optional[exp.Expression]: 6027 if not node: 6028 return node 6029 6030 for column in node.find_all(exp.Column): 6031 if column.parts[0].name in lambda_variables: 6032 dot_or_id = column.to_dot() if column.table else column.this 6033 parent = column.parent 6034 6035 while isinstance(parent, exp.Dot): 6036 if not isinstance(parent.parent, exp.Dot): 6037 parent.replace(dot_or_id) 6038 break 6039 parent = parent.parent 6040 else: 6041 if column is node: 6042 node = dot_or_id 6043 else: 6044 column.replace(dot_or_id) 6045 return node 6046 6047 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6048 start = self._prev 6049 6050 # Not to be confused with TRUNCATE(number, decimals) function call 6051 if self._match(TokenType.L_PAREN): 6052 self._retreat(self._index - 2) 6053 return self._parse_function() 6054 6055 # Clickhouse supports TRUNCATE DATABASE as well 6056 is_database = self._match(TokenType.DATABASE) 6057 6058 self._match(TokenType.TABLE) 6059 6060 exists = self._parse_exists(not_=False) 6061 6062 expressions = self._parse_csv( 6063 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6064 ) 6065 6066 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6067 6068 if self._match_text_seq("RESTART", "IDENTITY"): 6069 identity = "RESTART" 6070 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6071 identity = "CONTINUE" 6072 else: 6073 identity = None 6074 6075 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6076 option = self._prev.text 6077 else: 6078 option = None 6079 6080 partition = self._parse_partition() 6081 6082 # Fallback case 6083 if self._curr: 6084 return self._parse_as_command(start) 6085 6086 return self.expression( 6087 exp.TruncateTable, 6088 expressions=expressions, 6089 is_database=is_database, 6090 exists=exists, 6091 cluster=cluster, 6092 identity=identity, 6093 option=option, 6094 partition=partition, 6095 )
24def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 25 if len(args) == 1 and args[0].is_star: 26 return exp.StarMap(this=args[0]) 27 28 keys = [] 29 values = [] 30 for i in range(0, len(args), 2): 31 keys.append(args[i]) 32 values.append(args[i + 1]) 33 34 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 51 # Default argument order is base, expression 52 this = seq_get(args, 0) 53 expression = seq_get(args, 1) 54 55 if expression: 56 if not dialect.LOG_BASE_FIRST: 57 this, expression = expression, this 58 return exp.Log(this=this, expression=expression) 59 60 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
63def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 64 def _builder(args: t.List, dialect: Dialect) -> E: 65 expression = expr_type( 66 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 67 ) 68 if len(args) > 2 and expr_type is exp.JSONExtract: 69 expression.set("expressions", args[2:]) 70 71 return expression 72 73 return _builder
86class Parser(metaclass=_Parser): 87 """ 88 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 89 90 Args: 91 error_level: The desired error level. 92 Default: ErrorLevel.IMMEDIATE 93 error_message_context: The amount of context to capture from a query string when displaying 94 the error message (in number of characters). 95 Default: 100 96 max_errors: Maximum number of error messages to include in a raised ParseError. 97 This is only relevant if error_level is ErrorLevel.RAISE. 98 Default: 3 99 """ 100 101 FUNCTIONS: t.Dict[str, t.Callable] = { 102 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 103 "CONCAT": lambda args, dialect: exp.Concat( 104 expressions=args, 105 safe=not dialect.STRICT_STRING_CONCAT, 106 coalesce=dialect.CONCAT_COALESCE, 107 ), 108 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 109 expressions=args, 110 safe=not dialect.STRICT_STRING_CONCAT, 111 coalesce=dialect.CONCAT_COALESCE, 112 ), 113 "DATE_TO_DATE_STR": lambda args: exp.Cast( 114 this=seq_get(args, 0), 115 to=exp.DataType(this=exp.DataType.Type.TEXT), 116 ), 117 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 118 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 119 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 120 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 121 "LIKE": build_like, 122 "LOG": build_logarithm, 123 "TIME_TO_TIME_STR": lambda args: exp.Cast( 124 this=seq_get(args, 0), 125 to=exp.DataType(this=exp.DataType.Type.TEXT), 126 ), 127 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 128 this=exp.Cast( 129 this=seq_get(args, 0), 130 to=exp.DataType(this=exp.DataType.Type.TEXT), 131 ), 132 start=exp.Literal.number(1), 133 length=exp.Literal.number(10), 134 ), 135 "VAR_MAP": build_var_map, 136 } 137 138 NO_PAREN_FUNCTIONS = { 139 TokenType.CURRENT_DATE: exp.CurrentDate, 140 TokenType.CURRENT_DATETIME: exp.CurrentDate, 141 TokenType.CURRENT_TIME: exp.CurrentTime, 142 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 143 TokenType.CURRENT_USER: exp.CurrentUser, 144 } 145 146 STRUCT_TYPE_TOKENS = { 147 TokenType.NESTED, 148 TokenType.STRUCT, 149 } 150 151 NESTED_TYPE_TOKENS = { 152 TokenType.ARRAY, 153 TokenType.LOWCARDINALITY, 154 TokenType.MAP, 155 TokenType.NULLABLE, 156 *STRUCT_TYPE_TOKENS, 157 } 158 159 ENUM_TYPE_TOKENS = { 160 TokenType.ENUM, 161 TokenType.ENUM8, 162 TokenType.ENUM16, 163 } 164 165 AGGREGATE_TYPE_TOKENS = { 166 TokenType.AGGREGATEFUNCTION, 167 TokenType.SIMPLEAGGREGATEFUNCTION, 168 } 169 170 TYPE_TOKENS = { 171 TokenType.BIT, 172 TokenType.BOOLEAN, 173 TokenType.TINYINT, 174 TokenType.UTINYINT, 175 TokenType.SMALLINT, 176 TokenType.USMALLINT, 177 TokenType.INT, 178 TokenType.UINT, 179 TokenType.BIGINT, 180 TokenType.UBIGINT, 181 TokenType.INT128, 182 TokenType.UINT128, 183 TokenType.INT256, 184 TokenType.UINT256, 185 TokenType.MEDIUMINT, 186 TokenType.UMEDIUMINT, 187 TokenType.FIXEDSTRING, 188 TokenType.FLOAT, 189 TokenType.DOUBLE, 190 TokenType.CHAR, 191 TokenType.NCHAR, 192 TokenType.VARCHAR, 193 TokenType.NVARCHAR, 194 TokenType.BPCHAR, 195 TokenType.TEXT, 196 TokenType.MEDIUMTEXT, 197 TokenType.LONGTEXT, 198 TokenType.MEDIUMBLOB, 199 TokenType.LONGBLOB, 200 TokenType.BINARY, 201 TokenType.VARBINARY, 202 TokenType.JSON, 203 TokenType.JSONB, 204 TokenType.INTERVAL, 205 TokenType.TINYBLOB, 206 TokenType.TINYTEXT, 207 TokenType.TIME, 208 TokenType.TIMETZ, 209 TokenType.TIMESTAMP, 210 TokenType.TIMESTAMP_S, 211 TokenType.TIMESTAMP_MS, 212 TokenType.TIMESTAMP_NS, 213 TokenType.TIMESTAMPTZ, 214 TokenType.TIMESTAMPLTZ, 215 TokenType.DATETIME, 216 TokenType.DATETIME64, 217 TokenType.DATE, 218 TokenType.DATE32, 219 TokenType.INT4RANGE, 220 TokenType.INT4MULTIRANGE, 221 TokenType.INT8RANGE, 222 TokenType.INT8MULTIRANGE, 223 TokenType.NUMRANGE, 224 TokenType.NUMMULTIRANGE, 225 TokenType.TSRANGE, 226 TokenType.TSMULTIRANGE, 227 TokenType.TSTZRANGE, 228 TokenType.TSTZMULTIRANGE, 229 TokenType.DATERANGE, 230 TokenType.DATEMULTIRANGE, 231 TokenType.DECIMAL, 232 TokenType.UDECIMAL, 233 TokenType.BIGDECIMAL, 234 TokenType.UUID, 235 TokenType.GEOGRAPHY, 236 TokenType.GEOMETRY, 237 TokenType.HLLSKETCH, 238 TokenType.HSTORE, 239 TokenType.PSEUDO_TYPE, 240 TokenType.SUPER, 241 TokenType.SERIAL, 242 TokenType.SMALLSERIAL, 243 TokenType.BIGSERIAL, 244 TokenType.XML, 245 TokenType.YEAR, 246 TokenType.UNIQUEIDENTIFIER, 247 TokenType.USERDEFINED, 248 TokenType.MONEY, 249 TokenType.SMALLMONEY, 250 TokenType.ROWVERSION, 251 TokenType.IMAGE, 252 TokenType.VARIANT, 253 TokenType.OBJECT, 254 TokenType.OBJECT_IDENTIFIER, 255 TokenType.INET, 256 TokenType.IPADDRESS, 257 TokenType.IPPREFIX, 258 TokenType.IPV4, 259 TokenType.IPV6, 260 TokenType.UNKNOWN, 261 TokenType.NULL, 262 *ENUM_TYPE_TOKENS, 263 *NESTED_TYPE_TOKENS, 264 *AGGREGATE_TYPE_TOKENS, 265 } 266 267 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 268 TokenType.BIGINT: TokenType.UBIGINT, 269 TokenType.INT: TokenType.UINT, 270 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 271 TokenType.SMALLINT: TokenType.USMALLINT, 272 TokenType.TINYINT: TokenType.UTINYINT, 273 TokenType.DECIMAL: TokenType.UDECIMAL, 274 } 275 276 SUBQUERY_PREDICATES = { 277 TokenType.ANY: exp.Any, 278 TokenType.ALL: exp.All, 279 TokenType.EXISTS: exp.Exists, 280 TokenType.SOME: exp.Any, 281 } 282 283 RESERVED_TOKENS = { 284 *Tokenizer.SINGLE_TOKENS.values(), 285 TokenType.SELECT, 286 } 287 288 DB_CREATABLES = { 289 TokenType.DATABASE, 290 TokenType.SCHEMA, 291 TokenType.TABLE, 292 TokenType.VIEW, 293 TokenType.MODEL, 294 TokenType.DICTIONARY, 295 TokenType.SEQUENCE, 296 TokenType.STORAGE_INTEGRATION, 297 } 298 299 CREATABLES = { 300 TokenType.COLUMN, 301 TokenType.CONSTRAINT, 302 TokenType.FUNCTION, 303 TokenType.INDEX, 304 TokenType.PROCEDURE, 305 TokenType.FOREIGN_KEY, 306 *DB_CREATABLES, 307 } 308 309 # Tokens that can represent identifiers 310 ID_VAR_TOKENS = { 311 TokenType.VAR, 312 TokenType.ANTI, 313 TokenType.APPLY, 314 TokenType.ASC, 315 TokenType.AUTO_INCREMENT, 316 TokenType.BEGIN, 317 TokenType.BPCHAR, 318 TokenType.CACHE, 319 TokenType.CASE, 320 TokenType.COLLATE, 321 TokenType.COMMAND, 322 TokenType.COMMENT, 323 TokenType.COMMIT, 324 TokenType.CONSTRAINT, 325 TokenType.DEFAULT, 326 TokenType.DELETE, 327 TokenType.DESC, 328 TokenType.DESCRIBE, 329 TokenType.DICTIONARY, 330 TokenType.DIV, 331 TokenType.END, 332 TokenType.EXECUTE, 333 TokenType.ESCAPE, 334 TokenType.FALSE, 335 TokenType.FIRST, 336 TokenType.FILTER, 337 TokenType.FINAL, 338 TokenType.FORMAT, 339 TokenType.FULL, 340 TokenType.IS, 341 TokenType.ISNULL, 342 TokenType.INTERVAL, 343 TokenType.KEEP, 344 TokenType.KILL, 345 TokenType.LEFT, 346 TokenType.LOAD, 347 TokenType.MERGE, 348 TokenType.NATURAL, 349 TokenType.NEXT, 350 TokenType.OFFSET, 351 TokenType.OPERATOR, 352 TokenType.ORDINALITY, 353 TokenType.OVERLAPS, 354 TokenType.OVERWRITE, 355 TokenType.PARTITION, 356 TokenType.PERCENT, 357 TokenType.PIVOT, 358 TokenType.PRAGMA, 359 TokenType.RANGE, 360 TokenType.RECURSIVE, 361 TokenType.REFERENCES, 362 TokenType.REFRESH, 363 TokenType.REPLACE, 364 TokenType.RIGHT, 365 TokenType.ROW, 366 TokenType.ROWS, 367 TokenType.SEMI, 368 TokenType.SET, 369 TokenType.SETTINGS, 370 TokenType.SHOW, 371 TokenType.TEMPORARY, 372 TokenType.TOP, 373 TokenType.TRUE, 374 TokenType.TRUNCATE, 375 TokenType.UNIQUE, 376 TokenType.UNPIVOT, 377 TokenType.UPDATE, 378 TokenType.USE, 379 TokenType.VOLATILE, 380 TokenType.WINDOW, 381 *CREATABLES, 382 *SUBQUERY_PREDICATES, 383 *TYPE_TOKENS, 384 *NO_PAREN_FUNCTIONS, 385 } 386 387 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 388 389 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 390 TokenType.ANTI, 391 TokenType.APPLY, 392 TokenType.ASOF, 393 TokenType.FULL, 394 TokenType.LEFT, 395 TokenType.LOCK, 396 TokenType.NATURAL, 397 TokenType.OFFSET, 398 TokenType.RIGHT, 399 TokenType.SEMI, 400 TokenType.WINDOW, 401 } 402 403 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 404 405 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 406 407 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 408 409 FUNC_TOKENS = { 410 TokenType.COLLATE, 411 TokenType.COMMAND, 412 TokenType.CURRENT_DATE, 413 TokenType.CURRENT_DATETIME, 414 TokenType.CURRENT_TIMESTAMP, 415 TokenType.CURRENT_TIME, 416 TokenType.CURRENT_USER, 417 TokenType.FILTER, 418 TokenType.FIRST, 419 TokenType.FORMAT, 420 TokenType.GLOB, 421 TokenType.IDENTIFIER, 422 TokenType.INDEX, 423 TokenType.ISNULL, 424 TokenType.ILIKE, 425 TokenType.INSERT, 426 TokenType.LIKE, 427 TokenType.MERGE, 428 TokenType.OFFSET, 429 TokenType.PRIMARY_KEY, 430 TokenType.RANGE, 431 TokenType.REPLACE, 432 TokenType.RLIKE, 433 TokenType.ROW, 434 TokenType.UNNEST, 435 TokenType.VAR, 436 TokenType.LEFT, 437 TokenType.RIGHT, 438 TokenType.SEQUENCE, 439 TokenType.DATE, 440 TokenType.DATETIME, 441 TokenType.TABLE, 442 TokenType.TIMESTAMP, 443 TokenType.TIMESTAMPTZ, 444 TokenType.TRUNCATE, 445 TokenType.WINDOW, 446 TokenType.XOR, 447 *TYPE_TOKENS, 448 *SUBQUERY_PREDICATES, 449 } 450 451 CONJUNCTION = { 452 TokenType.AND: exp.And, 453 TokenType.OR: exp.Or, 454 } 455 456 EQUALITY = { 457 TokenType.COLON_EQ: exp.PropertyEQ, 458 TokenType.EQ: exp.EQ, 459 TokenType.NEQ: exp.NEQ, 460 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 461 } 462 463 COMPARISON = { 464 TokenType.GT: exp.GT, 465 TokenType.GTE: exp.GTE, 466 TokenType.LT: exp.LT, 467 TokenType.LTE: exp.LTE, 468 } 469 470 BITWISE = { 471 TokenType.AMP: exp.BitwiseAnd, 472 TokenType.CARET: exp.BitwiseXor, 473 TokenType.PIPE: exp.BitwiseOr, 474 } 475 476 TERM = { 477 TokenType.DASH: exp.Sub, 478 TokenType.PLUS: exp.Add, 479 TokenType.MOD: exp.Mod, 480 TokenType.COLLATE: exp.Collate, 481 } 482 483 FACTOR = { 484 TokenType.DIV: exp.IntDiv, 485 TokenType.LR_ARROW: exp.Distance, 486 TokenType.SLASH: exp.Div, 487 TokenType.STAR: exp.Mul, 488 } 489 490 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 491 492 TIMES = { 493 TokenType.TIME, 494 TokenType.TIMETZ, 495 } 496 497 TIMESTAMPS = { 498 TokenType.TIMESTAMP, 499 TokenType.TIMESTAMPTZ, 500 TokenType.TIMESTAMPLTZ, 501 *TIMES, 502 } 503 504 SET_OPERATIONS = { 505 TokenType.UNION, 506 TokenType.INTERSECT, 507 TokenType.EXCEPT, 508 } 509 510 JOIN_METHODS = { 511 TokenType.NATURAL, 512 TokenType.ASOF, 513 } 514 515 JOIN_SIDES = { 516 TokenType.LEFT, 517 TokenType.RIGHT, 518 TokenType.FULL, 519 } 520 521 JOIN_KINDS = { 522 TokenType.INNER, 523 TokenType.OUTER, 524 TokenType.CROSS, 525 TokenType.SEMI, 526 TokenType.ANTI, 527 } 528 529 JOIN_HINTS: t.Set[str] = set() 530 531 LAMBDAS = { 532 TokenType.ARROW: lambda self, expressions: self.expression( 533 exp.Lambda, 534 this=self._replace_lambda( 535 self._parse_conjunction(), 536 {node.name for node in expressions}, 537 ), 538 expressions=expressions, 539 ), 540 TokenType.FARROW: lambda self, expressions: self.expression( 541 exp.Kwarg, 542 this=exp.var(expressions[0].name), 543 expression=self._parse_conjunction(), 544 ), 545 } 546 547 COLUMN_OPERATORS = { 548 TokenType.DOT: None, 549 TokenType.DCOLON: lambda self, this, to: self.expression( 550 exp.Cast if self.STRICT_CAST else exp.TryCast, 551 this=this, 552 to=to, 553 ), 554 TokenType.ARROW: lambda self, this, path: self.expression( 555 exp.JSONExtract, 556 this=this, 557 expression=self.dialect.to_json_path(path), 558 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 559 ), 560 TokenType.DARROW: lambda self, this, path: self.expression( 561 exp.JSONExtractScalar, 562 this=this, 563 expression=self.dialect.to_json_path(path), 564 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 565 ), 566 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 567 exp.JSONBExtract, 568 this=this, 569 expression=path, 570 ), 571 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 572 exp.JSONBExtractScalar, 573 this=this, 574 expression=path, 575 ), 576 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 577 exp.JSONBContains, 578 this=this, 579 expression=key, 580 ), 581 } 582 583 EXPRESSION_PARSERS = { 584 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 585 exp.Column: lambda self: self._parse_column(), 586 exp.Condition: lambda self: self._parse_conjunction(), 587 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 588 exp.Expression: lambda self: self._parse_expression(), 589 exp.From: lambda self: self._parse_from(), 590 exp.Group: lambda self: self._parse_group(), 591 exp.Having: lambda self: self._parse_having(), 592 exp.Identifier: lambda self: self._parse_id_var(), 593 exp.Join: lambda self: self._parse_join(), 594 exp.Lambda: lambda self: self._parse_lambda(), 595 exp.Lateral: lambda self: self._parse_lateral(), 596 exp.Limit: lambda self: self._parse_limit(), 597 exp.Offset: lambda self: self._parse_offset(), 598 exp.Order: lambda self: self._parse_order(), 599 exp.Ordered: lambda self: self._parse_ordered(), 600 exp.Properties: lambda self: self._parse_properties(), 601 exp.Qualify: lambda self: self._parse_qualify(), 602 exp.Returning: lambda self: self._parse_returning(), 603 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 604 exp.Table: lambda self: self._parse_table_parts(), 605 exp.TableAlias: lambda self: self._parse_table_alias(), 606 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 607 exp.Where: lambda self: self._parse_where(), 608 exp.Window: lambda self: self._parse_named_window(), 609 exp.With: lambda self: self._parse_with(), 610 "JOIN_TYPE": lambda self: self._parse_join_parts(), 611 } 612 613 STATEMENT_PARSERS = { 614 TokenType.ALTER: lambda self: self._parse_alter(), 615 TokenType.BEGIN: lambda self: self._parse_transaction(), 616 TokenType.CACHE: lambda self: self._parse_cache(), 617 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 618 TokenType.COMMENT: lambda self: self._parse_comment(), 619 TokenType.CREATE: lambda self: self._parse_create(), 620 TokenType.DELETE: lambda self: self._parse_delete(), 621 TokenType.DESC: lambda self: self._parse_describe(), 622 TokenType.DESCRIBE: lambda self: self._parse_describe(), 623 TokenType.DROP: lambda self: self._parse_drop(), 624 TokenType.INSERT: lambda self: self._parse_insert(), 625 TokenType.KILL: lambda self: self._parse_kill(), 626 TokenType.LOAD: lambda self: self._parse_load(), 627 TokenType.MERGE: lambda self: self._parse_merge(), 628 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 629 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 630 TokenType.REFRESH: lambda self: self._parse_refresh(), 631 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 632 TokenType.SET: lambda self: self._parse_set(), 633 TokenType.UNCACHE: lambda self: self._parse_uncache(), 634 TokenType.UPDATE: lambda self: self._parse_update(), 635 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 636 TokenType.USE: lambda self: self.expression( 637 exp.Use, 638 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 639 this=self._parse_table(schema=False), 640 ), 641 } 642 643 UNARY_PARSERS = { 644 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 645 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 646 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 647 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 648 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 649 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 650 } 651 652 STRING_PARSERS = { 653 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 654 exp.RawString, this=token.text 655 ), 656 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 657 exp.National, this=token.text 658 ), 659 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 660 TokenType.STRING: lambda self, token: self.expression( 661 exp.Literal, this=token.text, is_string=True 662 ), 663 TokenType.UNICODE_STRING: lambda self, token: self.expression( 664 exp.UnicodeString, 665 this=token.text, 666 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 667 ), 668 } 669 670 NUMERIC_PARSERS = { 671 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 672 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 673 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 674 TokenType.NUMBER: lambda self, token: self.expression( 675 exp.Literal, this=token.text, is_string=False 676 ), 677 } 678 679 PRIMARY_PARSERS = { 680 **STRING_PARSERS, 681 **NUMERIC_PARSERS, 682 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 683 TokenType.NULL: lambda self, _: self.expression(exp.Null), 684 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 685 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 686 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 687 TokenType.STAR: lambda self, _: self.expression( 688 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 689 ), 690 } 691 692 PLACEHOLDER_PARSERS = { 693 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 694 TokenType.PARAMETER: lambda self: self._parse_parameter(), 695 TokenType.COLON: lambda self: ( 696 self.expression(exp.Placeholder, this=self._prev.text) 697 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 698 else None 699 ), 700 } 701 702 RANGE_PARSERS = { 703 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 704 TokenType.GLOB: binary_range_parser(exp.Glob), 705 TokenType.ILIKE: binary_range_parser(exp.ILike), 706 TokenType.IN: lambda self, this: self._parse_in(this), 707 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 708 TokenType.IS: lambda self, this: self._parse_is(this), 709 TokenType.LIKE: binary_range_parser(exp.Like), 710 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 711 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 712 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 713 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 714 } 715 716 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 717 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 718 "AUTO": lambda self: self._parse_auto_property(), 719 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 720 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 721 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 722 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 723 "CHECKSUM": lambda self: self._parse_checksum(), 724 "CLUSTER BY": lambda self: self._parse_cluster(), 725 "CLUSTERED": lambda self: self._parse_clustered_by(), 726 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 727 exp.CollateProperty, **kwargs 728 ), 729 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 730 "CONTAINS": lambda self: self._parse_contains_property(), 731 "COPY": lambda self: self._parse_copy_property(), 732 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 733 "DEFINER": lambda self: self._parse_definer(), 734 "DETERMINISTIC": lambda self: self.expression( 735 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 736 ), 737 "DISTKEY": lambda self: self._parse_distkey(), 738 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 739 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 740 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 741 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 742 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 743 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 744 "FREESPACE": lambda self: self._parse_freespace(), 745 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 746 "HEAP": lambda self: self.expression(exp.HeapProperty), 747 "IMMUTABLE": lambda self: self.expression( 748 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 749 ), 750 "INHERITS": lambda self: self.expression( 751 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 752 ), 753 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 754 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 755 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 756 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 757 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 758 "LIKE": lambda self: self._parse_create_like(), 759 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 760 "LOCK": lambda self: self._parse_locking(), 761 "LOCKING": lambda self: self._parse_locking(), 762 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 763 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 764 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 765 "MODIFIES": lambda self: self._parse_modifies_property(), 766 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 767 "NO": lambda self: self._parse_no_property(), 768 "ON": lambda self: self._parse_on_property(), 769 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 770 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 771 "PARTITION": lambda self: self._parse_partitioned_of(), 772 "PARTITION BY": lambda self: self._parse_partitioned_by(), 773 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 774 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 775 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 776 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 777 "READS": lambda self: self._parse_reads_property(), 778 "REMOTE": lambda self: self._parse_remote_with_connection(), 779 "RETURNS": lambda self: self._parse_returns(), 780 "ROW": lambda self: self._parse_row(), 781 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 782 "SAMPLE": lambda self: self.expression( 783 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 784 ), 785 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 786 "SETTINGS": lambda self: self.expression( 787 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 788 ), 789 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 790 "SORTKEY": lambda self: self._parse_sortkey(), 791 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 792 "STABLE": lambda self: self.expression( 793 exp.StabilityProperty, this=exp.Literal.string("STABLE") 794 ), 795 "STORED": lambda self: self._parse_stored(), 796 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 797 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 798 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 799 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 800 "TO": lambda self: self._parse_to_table(), 801 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 802 "TRANSFORM": lambda self: self.expression( 803 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 804 ), 805 "TTL": lambda self: self._parse_ttl(), 806 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 807 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 808 "VOLATILE": lambda self: self._parse_volatile_property(), 809 "WITH": lambda self: self._parse_with_property(), 810 } 811 812 CONSTRAINT_PARSERS = { 813 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 814 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 815 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 816 "CHARACTER SET": lambda self: self.expression( 817 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 818 ), 819 "CHECK": lambda self: self.expression( 820 exp.CheckColumnConstraint, 821 this=self._parse_wrapped(self._parse_conjunction), 822 enforced=self._match_text_seq("ENFORCED"), 823 ), 824 "COLLATE": lambda self: self.expression( 825 exp.CollateColumnConstraint, this=self._parse_var() 826 ), 827 "COMMENT": lambda self: self.expression( 828 exp.CommentColumnConstraint, this=self._parse_string() 829 ), 830 "COMPRESS": lambda self: self._parse_compress(), 831 "CLUSTERED": lambda self: self.expression( 832 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 833 ), 834 "NONCLUSTERED": lambda self: self.expression( 835 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 836 ), 837 "DEFAULT": lambda self: self.expression( 838 exp.DefaultColumnConstraint, this=self._parse_bitwise() 839 ), 840 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 841 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 842 "FORMAT": lambda self: self.expression( 843 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 844 ), 845 "GENERATED": lambda self: self._parse_generated_as_identity(), 846 "IDENTITY": lambda self: self._parse_auto_increment(), 847 "INLINE": lambda self: self._parse_inline(), 848 "LIKE": lambda self: self._parse_create_like(), 849 "NOT": lambda self: self._parse_not_constraint(), 850 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 851 "ON": lambda self: ( 852 self._match(TokenType.UPDATE) 853 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 854 ) 855 or self.expression(exp.OnProperty, this=self._parse_id_var()), 856 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 857 "PERIOD": lambda self: self._parse_period_for_system_time(), 858 "PRIMARY KEY": lambda self: self._parse_primary_key(), 859 "REFERENCES": lambda self: self._parse_references(match=False), 860 "TITLE": lambda self: self.expression( 861 exp.TitleColumnConstraint, this=self._parse_var_or_string() 862 ), 863 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 864 "UNIQUE": lambda self: self._parse_unique(), 865 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 866 "WITH": lambda self: self.expression( 867 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 868 ), 869 } 870 871 ALTER_PARSERS = { 872 "ADD": lambda self: self._parse_alter_table_add(), 873 "ALTER": lambda self: self._parse_alter_table_alter(), 874 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 875 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 876 "DROP": lambda self: self._parse_alter_table_drop(), 877 "RENAME": lambda self: self._parse_alter_table_rename(), 878 } 879 880 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 881 882 NO_PAREN_FUNCTION_PARSERS = { 883 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 884 "CASE": lambda self: self._parse_case(), 885 "IF": lambda self: self._parse_if(), 886 "NEXT": lambda self: self._parse_next_value_for(), 887 } 888 889 INVALID_FUNC_NAME_TOKENS = { 890 TokenType.IDENTIFIER, 891 TokenType.STRING, 892 } 893 894 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 895 896 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 897 898 FUNCTION_PARSERS = { 899 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 900 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 901 "DECODE": lambda self: self._parse_decode(), 902 "EXTRACT": lambda self: self._parse_extract(), 903 "JSON_OBJECT": lambda self: self._parse_json_object(), 904 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 905 "JSON_TABLE": lambda self: self._parse_json_table(), 906 "MATCH": lambda self: self._parse_match_against(), 907 "OPENJSON": lambda self: self._parse_open_json(), 908 "POSITION": lambda self: self._parse_position(), 909 "PREDICT": lambda self: self._parse_predict(), 910 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 911 "STRING_AGG": lambda self: self._parse_string_agg(), 912 "SUBSTRING": lambda self: self._parse_substring(), 913 "TRIM": lambda self: self._parse_trim(), 914 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 915 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 916 } 917 918 QUERY_MODIFIER_PARSERS = { 919 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 920 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 921 TokenType.WHERE: lambda self: ("where", self._parse_where()), 922 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 923 TokenType.HAVING: lambda self: ("having", self._parse_having()), 924 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 925 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 926 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 927 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 928 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 929 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 930 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 931 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 932 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 933 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 934 TokenType.CLUSTER_BY: lambda self: ( 935 "cluster", 936 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 937 ), 938 TokenType.DISTRIBUTE_BY: lambda self: ( 939 "distribute", 940 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 941 ), 942 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 943 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 944 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 945 } 946 947 SET_PARSERS = { 948 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 949 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 950 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 951 "TRANSACTION": lambda self: self._parse_set_transaction(), 952 } 953 954 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 955 956 TYPE_LITERAL_PARSERS = { 957 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 958 } 959 960 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 961 962 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 963 964 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 965 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 966 "ISOLATION": ( 967 ("LEVEL", "REPEATABLE", "READ"), 968 ("LEVEL", "READ", "COMMITTED"), 969 ("LEVEL", "READ", "UNCOMITTED"), 970 ("LEVEL", "SERIALIZABLE"), 971 ), 972 "READ": ("WRITE", "ONLY"), 973 } 974 975 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 976 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 977 ) 978 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 979 980 CREATE_SEQUENCE: OPTIONS_TYPE = { 981 "SCALE": ("EXTEND", "NOEXTEND"), 982 "SHARD": ("EXTEND", "NOEXTEND"), 983 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 984 **dict.fromkeys( 985 ( 986 "SESSION", 987 "GLOBAL", 988 "KEEP", 989 "NOKEEP", 990 "ORDER", 991 "NOORDER", 992 "NOCACHE", 993 "CYCLE", 994 "NOCYCLE", 995 "NOMINVALUE", 996 "NOMAXVALUE", 997 "NOSCALE", 998 "NOSHARD", 999 ), 1000 tuple(), 1001 ), 1002 } 1003 1004 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1005 1006 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1007 1008 CLONE_KEYWORDS = {"CLONE", "COPY"} 1009 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1010 1011 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 1012 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1013 1014 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1015 1016 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1017 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1018 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1019 1020 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1021 1022 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1023 1024 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1025 1026 DISTINCT_TOKENS = {TokenType.DISTINCT} 1027 1028 NULL_TOKENS = {TokenType.NULL} 1029 1030 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1031 1032 STRICT_CAST = True 1033 1034 PREFIXED_PIVOT_COLUMNS = False 1035 IDENTIFY_PIVOT_STRINGS = False 1036 1037 LOG_DEFAULTS_TO_LN = False 1038 1039 # Whether ADD is present for each column added by ALTER TABLE 1040 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1041 1042 # Whether the table sample clause expects CSV syntax 1043 TABLESAMPLE_CSV = False 1044 1045 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1046 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1047 1048 # Whether the TRIM function expects the characters to trim as its first argument 1049 TRIM_PATTERN_FIRST = False 1050 1051 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1052 STRING_ALIASES = False 1053 1054 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1055 MODIFIERS_ATTACHED_TO_UNION = True 1056 UNION_MODIFIERS = {"order", "limit", "offset"} 1057 1058 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1059 NO_PAREN_IF_COMMANDS = True 1060 1061 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1062 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1063 1064 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1065 # If this is True and '(' is not found, the keyword will be treated as an identifier 1066 VALUES_FOLLOWED_BY_PAREN = True 1067 1068 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1069 SUPPORTS_IMPLICIT_UNNEST = False 1070 1071 __slots__ = ( 1072 "error_level", 1073 "error_message_context", 1074 "max_errors", 1075 "dialect", 1076 "sql", 1077 "errors", 1078 "_tokens", 1079 "_index", 1080 "_curr", 1081 "_next", 1082 "_prev", 1083 "_prev_comments", 1084 ) 1085 1086 # Autofilled 1087 SHOW_TRIE: t.Dict = {} 1088 SET_TRIE: t.Dict = {} 1089 1090 def __init__( 1091 self, 1092 error_level: t.Optional[ErrorLevel] = None, 1093 error_message_context: int = 100, 1094 max_errors: int = 3, 1095 dialect: DialectType = None, 1096 ): 1097 from sqlglot.dialects import Dialect 1098 1099 self.error_level = error_level or ErrorLevel.IMMEDIATE 1100 self.error_message_context = error_message_context 1101 self.max_errors = max_errors 1102 self.dialect = Dialect.get_or_raise(dialect) 1103 self.reset() 1104 1105 def reset(self): 1106 self.sql = "" 1107 self.errors = [] 1108 self._tokens = [] 1109 self._index = 0 1110 self._curr = None 1111 self._next = None 1112 self._prev = None 1113 self._prev_comments = None 1114 1115 def parse( 1116 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1117 ) -> t.List[t.Optional[exp.Expression]]: 1118 """ 1119 Parses a list of tokens and returns a list of syntax trees, one tree 1120 per parsed SQL statement. 1121 1122 Args: 1123 raw_tokens: The list of tokens. 1124 sql: The original SQL string, used to produce helpful debug messages. 1125 1126 Returns: 1127 The list of the produced syntax trees. 1128 """ 1129 return self._parse( 1130 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1131 ) 1132 1133 def parse_into( 1134 self, 1135 expression_types: exp.IntoType, 1136 raw_tokens: t.List[Token], 1137 sql: t.Optional[str] = None, 1138 ) -> t.List[t.Optional[exp.Expression]]: 1139 """ 1140 Parses a list of tokens into a given Expression type. If a collection of Expression 1141 types is given instead, this method will try to parse the token list into each one 1142 of them, stopping at the first for which the parsing succeeds. 1143 1144 Args: 1145 expression_types: The expression type(s) to try and parse the token list into. 1146 raw_tokens: The list of tokens. 1147 sql: The original SQL string, used to produce helpful debug messages. 1148 1149 Returns: 1150 The target Expression. 1151 """ 1152 errors = [] 1153 for expression_type in ensure_list(expression_types): 1154 parser = self.EXPRESSION_PARSERS.get(expression_type) 1155 if not parser: 1156 raise TypeError(f"No parser registered for {expression_type}") 1157 1158 try: 1159 return self._parse(parser, raw_tokens, sql) 1160 except ParseError as e: 1161 e.errors[0]["into_expression"] = expression_type 1162 errors.append(e) 1163 1164 raise ParseError( 1165 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1166 errors=merge_errors(errors), 1167 ) from errors[-1] 1168 1169 def _parse( 1170 self, 1171 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1172 raw_tokens: t.List[Token], 1173 sql: t.Optional[str] = None, 1174 ) -> t.List[t.Optional[exp.Expression]]: 1175 self.reset() 1176 self.sql = sql or "" 1177 1178 total = len(raw_tokens) 1179 chunks: t.List[t.List[Token]] = [[]] 1180 1181 for i, token in enumerate(raw_tokens): 1182 if token.token_type == TokenType.SEMICOLON: 1183 if i < total - 1: 1184 chunks.append([]) 1185 else: 1186 chunks[-1].append(token) 1187 1188 expressions = [] 1189 1190 for tokens in chunks: 1191 self._index = -1 1192 self._tokens = tokens 1193 self._advance() 1194 1195 expressions.append(parse_method(self)) 1196 1197 if self._index < len(self._tokens): 1198 self.raise_error("Invalid expression / Unexpected token") 1199 1200 self.check_errors() 1201 1202 return expressions 1203 1204 def check_errors(self) -> None: 1205 """Logs or raises any found errors, depending on the chosen error level setting.""" 1206 if self.error_level == ErrorLevel.WARN: 1207 for error in self.errors: 1208 logger.error(str(error)) 1209 elif self.error_level == ErrorLevel.RAISE and self.errors: 1210 raise ParseError( 1211 concat_messages(self.errors, self.max_errors), 1212 errors=merge_errors(self.errors), 1213 ) 1214 1215 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1216 """ 1217 Appends an error in the list of recorded errors or raises it, depending on the chosen 1218 error level setting. 1219 """ 1220 token = token or self._curr or self._prev or Token.string("") 1221 start = token.start 1222 end = token.end + 1 1223 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1224 highlight = self.sql[start:end] 1225 end_context = self.sql[end : end + self.error_message_context] 1226 1227 error = ParseError.new( 1228 f"{message}. Line {token.line}, Col: {token.col}.\n" 1229 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1230 description=message, 1231 line=token.line, 1232 col=token.col, 1233 start_context=start_context, 1234 highlight=highlight, 1235 end_context=end_context, 1236 ) 1237 1238 if self.error_level == ErrorLevel.IMMEDIATE: 1239 raise error 1240 1241 self.errors.append(error) 1242 1243 def expression( 1244 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1245 ) -> E: 1246 """ 1247 Creates a new, validated Expression. 1248 1249 Args: 1250 exp_class: The expression class to instantiate. 1251 comments: An optional list of comments to attach to the expression. 1252 kwargs: The arguments to set for the expression along with their respective values. 1253 1254 Returns: 1255 The target expression. 1256 """ 1257 instance = exp_class(**kwargs) 1258 instance.add_comments(comments) if comments else self._add_comments(instance) 1259 return self.validate_expression(instance) 1260 1261 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1262 if expression and self._prev_comments: 1263 expression.add_comments(self._prev_comments) 1264 self._prev_comments = None 1265 1266 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1267 """ 1268 Validates an Expression, making sure that all its mandatory arguments are set. 1269 1270 Args: 1271 expression: The expression to validate. 1272 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1273 1274 Returns: 1275 The validated expression. 1276 """ 1277 if self.error_level != ErrorLevel.IGNORE: 1278 for error_message in expression.error_messages(args): 1279 self.raise_error(error_message) 1280 1281 return expression 1282 1283 def _find_sql(self, start: Token, end: Token) -> str: 1284 return self.sql[start.start : end.end + 1] 1285 1286 def _is_connected(self) -> bool: 1287 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1288 1289 def _advance(self, times: int = 1) -> None: 1290 self._index += times 1291 self._curr = seq_get(self._tokens, self._index) 1292 self._next = seq_get(self._tokens, self._index + 1) 1293 1294 if self._index > 0: 1295 self._prev = self._tokens[self._index - 1] 1296 self._prev_comments = self._prev.comments 1297 else: 1298 self._prev = None 1299 self._prev_comments = None 1300 1301 def _retreat(self, index: int) -> None: 1302 if index != self._index: 1303 self._advance(index - self._index) 1304 1305 def _warn_unsupported(self) -> None: 1306 if len(self._tokens) <= 1: 1307 return 1308 1309 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1310 # interested in emitting a warning for the one being currently processed. 1311 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1312 1313 logger.warning( 1314 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1315 ) 1316 1317 def _parse_command(self) -> exp.Command: 1318 self._warn_unsupported() 1319 return self.expression( 1320 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1321 ) 1322 1323 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1324 start = self._prev 1325 exists = self._parse_exists() if allow_exists else None 1326 1327 self._match(TokenType.ON) 1328 1329 kind = self._match_set(self.CREATABLES) and self._prev 1330 if not kind: 1331 return self._parse_as_command(start) 1332 1333 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1334 this = self._parse_user_defined_function(kind=kind.token_type) 1335 elif kind.token_type == TokenType.TABLE: 1336 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1337 elif kind.token_type == TokenType.COLUMN: 1338 this = self._parse_column() 1339 else: 1340 this = self._parse_id_var() 1341 1342 self._match(TokenType.IS) 1343 1344 return self.expression( 1345 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1346 ) 1347 1348 def _parse_to_table( 1349 self, 1350 ) -> exp.ToTableProperty: 1351 table = self._parse_table_parts(schema=True) 1352 return self.expression(exp.ToTableProperty, this=table) 1353 1354 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1355 def _parse_ttl(self) -> exp.Expression: 1356 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1357 this = self._parse_bitwise() 1358 1359 if self._match_text_seq("DELETE"): 1360 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1361 if self._match_text_seq("RECOMPRESS"): 1362 return self.expression( 1363 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1364 ) 1365 if self._match_text_seq("TO", "DISK"): 1366 return self.expression( 1367 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1368 ) 1369 if self._match_text_seq("TO", "VOLUME"): 1370 return self.expression( 1371 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1372 ) 1373 1374 return this 1375 1376 expressions = self._parse_csv(_parse_ttl_action) 1377 where = self._parse_where() 1378 group = self._parse_group() 1379 1380 aggregates = None 1381 if group and self._match(TokenType.SET): 1382 aggregates = self._parse_csv(self._parse_set_item) 1383 1384 return self.expression( 1385 exp.MergeTreeTTL, 1386 expressions=expressions, 1387 where=where, 1388 group=group, 1389 aggregates=aggregates, 1390 ) 1391 1392 def _parse_statement(self) -> t.Optional[exp.Expression]: 1393 if self._curr is None: 1394 return None 1395 1396 if self._match_set(self.STATEMENT_PARSERS): 1397 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1398 1399 if self._match_set(Tokenizer.COMMANDS): 1400 return self._parse_command() 1401 1402 expression = self._parse_expression() 1403 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1404 return self._parse_query_modifiers(expression) 1405 1406 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1407 start = self._prev 1408 temporary = self._match(TokenType.TEMPORARY) 1409 materialized = self._match_text_seq("MATERIALIZED") 1410 1411 kind = self._match_set(self.CREATABLES) and self._prev.text 1412 if not kind: 1413 return self._parse_as_command(start) 1414 1415 return self.expression( 1416 exp.Drop, 1417 comments=start.comments, 1418 exists=exists or self._parse_exists(), 1419 this=self._parse_table( 1420 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1421 ), 1422 kind=kind, 1423 temporary=temporary, 1424 materialized=materialized, 1425 cascade=self._match_text_seq("CASCADE"), 1426 constraints=self._match_text_seq("CONSTRAINTS"), 1427 purge=self._match_text_seq("PURGE"), 1428 ) 1429 1430 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1431 return ( 1432 self._match_text_seq("IF") 1433 and (not not_ or self._match(TokenType.NOT)) 1434 and self._match(TokenType.EXISTS) 1435 ) 1436 1437 def _parse_create(self) -> exp.Create | exp.Command: 1438 # Note: this can't be None because we've matched a statement parser 1439 start = self._prev 1440 comments = self._prev_comments 1441 1442 replace = ( 1443 start.token_type == TokenType.REPLACE 1444 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1445 or self._match_pair(TokenType.OR, TokenType.ALTER) 1446 ) 1447 1448 unique = self._match(TokenType.UNIQUE) 1449 1450 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1451 self._advance() 1452 1453 properties = None 1454 create_token = self._match_set(self.CREATABLES) and self._prev 1455 1456 if not create_token: 1457 # exp.Properties.Location.POST_CREATE 1458 properties = self._parse_properties() 1459 create_token = self._match_set(self.CREATABLES) and self._prev 1460 1461 if not properties or not create_token: 1462 return self._parse_as_command(start) 1463 1464 exists = self._parse_exists(not_=True) 1465 this = None 1466 expression: t.Optional[exp.Expression] = None 1467 indexes = None 1468 no_schema_binding = None 1469 begin = None 1470 end = None 1471 clone = None 1472 1473 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1474 nonlocal properties 1475 if properties and temp_props: 1476 properties.expressions.extend(temp_props.expressions) 1477 elif temp_props: 1478 properties = temp_props 1479 1480 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1481 this = self._parse_user_defined_function(kind=create_token.token_type) 1482 1483 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1484 extend_props(self._parse_properties()) 1485 1486 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1487 1488 if not expression: 1489 if self._match(TokenType.COMMAND): 1490 expression = self._parse_as_command(self._prev) 1491 else: 1492 begin = self._match(TokenType.BEGIN) 1493 return_ = self._match_text_seq("RETURN") 1494 1495 if self._match(TokenType.STRING, advance=False): 1496 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1497 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1498 expression = self._parse_string() 1499 extend_props(self._parse_properties()) 1500 else: 1501 expression = self._parse_statement() 1502 1503 end = self._match_text_seq("END") 1504 1505 if return_: 1506 expression = self.expression(exp.Return, this=expression) 1507 elif create_token.token_type == TokenType.INDEX: 1508 this = self._parse_index(index=self._parse_id_var()) 1509 elif create_token.token_type in self.DB_CREATABLES: 1510 table_parts = self._parse_table_parts( 1511 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1512 ) 1513 1514 # exp.Properties.Location.POST_NAME 1515 self._match(TokenType.COMMA) 1516 extend_props(self._parse_properties(before=True)) 1517 1518 this = self._parse_schema(this=table_parts) 1519 1520 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1521 extend_props(self._parse_properties()) 1522 1523 self._match(TokenType.ALIAS) 1524 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1525 # exp.Properties.Location.POST_ALIAS 1526 extend_props(self._parse_properties()) 1527 1528 if create_token.token_type == TokenType.SEQUENCE: 1529 expression = self._parse_types() 1530 extend_props(self._parse_properties()) 1531 else: 1532 expression = self._parse_ddl_select() 1533 1534 if create_token.token_type == TokenType.TABLE: 1535 # exp.Properties.Location.POST_EXPRESSION 1536 extend_props(self._parse_properties()) 1537 1538 indexes = [] 1539 while True: 1540 index = self._parse_index() 1541 1542 # exp.Properties.Location.POST_INDEX 1543 extend_props(self._parse_properties()) 1544 1545 if not index: 1546 break 1547 else: 1548 self._match(TokenType.COMMA) 1549 indexes.append(index) 1550 elif create_token.token_type == TokenType.VIEW: 1551 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1552 no_schema_binding = True 1553 1554 shallow = self._match_text_seq("SHALLOW") 1555 1556 if self._match_texts(self.CLONE_KEYWORDS): 1557 copy = self._prev.text.lower() == "copy" 1558 clone = self.expression( 1559 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1560 ) 1561 1562 if self._curr: 1563 return self._parse_as_command(start) 1564 1565 return self.expression( 1566 exp.Create, 1567 comments=comments, 1568 this=this, 1569 kind=create_token.text.upper(), 1570 replace=replace, 1571 unique=unique, 1572 expression=expression, 1573 exists=exists, 1574 properties=properties, 1575 indexes=indexes, 1576 no_schema_binding=no_schema_binding, 1577 begin=begin, 1578 end=end, 1579 clone=clone, 1580 ) 1581 1582 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1583 seq = exp.SequenceProperties() 1584 1585 options = [] 1586 index = self._index 1587 1588 while self._curr: 1589 if self._match_text_seq("INCREMENT"): 1590 self._match_text_seq("BY") 1591 self._match_text_seq("=") 1592 seq.set("increment", self._parse_term()) 1593 elif self._match_text_seq("MINVALUE"): 1594 seq.set("minvalue", self._parse_term()) 1595 elif self._match_text_seq("MAXVALUE"): 1596 seq.set("maxvalue", self._parse_term()) 1597 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1598 self._match_text_seq("=") 1599 seq.set("start", self._parse_term()) 1600 elif self._match_text_seq("CACHE"): 1601 # T-SQL allows empty CACHE which is initialized dynamically 1602 seq.set("cache", self._parse_number() or True) 1603 elif self._match_text_seq("OWNED", "BY"): 1604 # "OWNED BY NONE" is the default 1605 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1606 else: 1607 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1608 if opt: 1609 options.append(opt) 1610 else: 1611 break 1612 1613 seq.set("options", options if options else None) 1614 return None if self._index == index else seq 1615 1616 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1617 # only used for teradata currently 1618 self._match(TokenType.COMMA) 1619 1620 kwargs = { 1621 "no": self._match_text_seq("NO"), 1622 "dual": self._match_text_seq("DUAL"), 1623 "before": self._match_text_seq("BEFORE"), 1624 "default": self._match_text_seq("DEFAULT"), 1625 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1626 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1627 "after": self._match_text_seq("AFTER"), 1628 "minimum": self._match_texts(("MIN", "MINIMUM")), 1629 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1630 } 1631 1632 if self._match_texts(self.PROPERTY_PARSERS): 1633 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1634 try: 1635 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1636 except TypeError: 1637 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1638 1639 return None 1640 1641 def _parse_property(self) -> t.Optional[exp.Expression]: 1642 if self._match_texts(self.PROPERTY_PARSERS): 1643 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1644 1645 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1646 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1647 1648 if self._match_text_seq("COMPOUND", "SORTKEY"): 1649 return self._parse_sortkey(compound=True) 1650 1651 if self._match_text_seq("SQL", "SECURITY"): 1652 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1653 1654 index = self._index 1655 key = self._parse_column() 1656 1657 if not self._match(TokenType.EQ): 1658 self._retreat(index) 1659 return self._parse_sequence_properties() 1660 1661 return self.expression( 1662 exp.Property, 1663 this=key.to_dot() if isinstance(key, exp.Column) else key, 1664 value=self._parse_column() or self._parse_var(any_token=True), 1665 ) 1666 1667 def _parse_stored(self) -> exp.FileFormatProperty: 1668 self._match(TokenType.ALIAS) 1669 1670 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1671 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1672 1673 return self.expression( 1674 exp.FileFormatProperty, 1675 this=( 1676 self.expression( 1677 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1678 ) 1679 if input_format or output_format 1680 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1681 ), 1682 ) 1683 1684 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1685 self._match(TokenType.EQ) 1686 self._match(TokenType.ALIAS) 1687 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1688 1689 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1690 properties = [] 1691 while True: 1692 if before: 1693 prop = self._parse_property_before() 1694 else: 1695 prop = self._parse_property() 1696 if not prop: 1697 break 1698 for p in ensure_list(prop): 1699 properties.append(p) 1700 1701 if properties: 1702 return self.expression(exp.Properties, expressions=properties) 1703 1704 return None 1705 1706 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1707 return self.expression( 1708 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1709 ) 1710 1711 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1712 if self._index >= 2: 1713 pre_volatile_token = self._tokens[self._index - 2] 1714 else: 1715 pre_volatile_token = None 1716 1717 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1718 return exp.VolatileProperty() 1719 1720 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1721 1722 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1723 self._match_pair(TokenType.EQ, TokenType.ON) 1724 1725 prop = self.expression(exp.WithSystemVersioningProperty) 1726 if self._match(TokenType.L_PAREN): 1727 self._match_text_seq("HISTORY_TABLE", "=") 1728 prop.set("this", self._parse_table_parts()) 1729 1730 if self._match(TokenType.COMMA): 1731 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1732 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1733 1734 self._match_r_paren() 1735 1736 return prop 1737 1738 def _parse_with_property( 1739 self, 1740 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1741 if self._match(TokenType.L_PAREN, advance=False): 1742 return self._parse_wrapped_csv(self._parse_property) 1743 1744 if self._match_text_seq("JOURNAL"): 1745 return self._parse_withjournaltable() 1746 1747 if self._match_text_seq("DATA"): 1748 return self._parse_withdata(no=False) 1749 elif self._match_text_seq("NO", "DATA"): 1750 return self._parse_withdata(no=True) 1751 1752 if not self._next: 1753 return None 1754 1755 return self._parse_withisolatedloading() 1756 1757 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1758 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1759 self._match(TokenType.EQ) 1760 1761 user = self._parse_id_var() 1762 self._match(TokenType.PARAMETER) 1763 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1764 1765 if not user or not host: 1766 return None 1767 1768 return exp.DefinerProperty(this=f"{user}@{host}") 1769 1770 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1771 self._match(TokenType.TABLE) 1772 self._match(TokenType.EQ) 1773 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1774 1775 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1776 return self.expression(exp.LogProperty, no=no) 1777 1778 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1779 return self.expression(exp.JournalProperty, **kwargs) 1780 1781 def _parse_checksum(self) -> exp.ChecksumProperty: 1782 self._match(TokenType.EQ) 1783 1784 on = None 1785 if self._match(TokenType.ON): 1786 on = True 1787 elif self._match_text_seq("OFF"): 1788 on = False 1789 1790 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1791 1792 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1793 return self.expression( 1794 exp.Cluster, 1795 expressions=( 1796 self._parse_wrapped_csv(self._parse_ordered) 1797 if wrapped 1798 else self._parse_csv(self._parse_ordered) 1799 ), 1800 ) 1801 1802 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1803 self._match_text_seq("BY") 1804 1805 self._match_l_paren() 1806 expressions = self._parse_csv(self._parse_column) 1807 self._match_r_paren() 1808 1809 if self._match_text_seq("SORTED", "BY"): 1810 self._match_l_paren() 1811 sorted_by = self._parse_csv(self._parse_ordered) 1812 self._match_r_paren() 1813 else: 1814 sorted_by = None 1815 1816 self._match(TokenType.INTO) 1817 buckets = self._parse_number() 1818 self._match_text_seq("BUCKETS") 1819 1820 return self.expression( 1821 exp.ClusteredByProperty, 1822 expressions=expressions, 1823 sorted_by=sorted_by, 1824 buckets=buckets, 1825 ) 1826 1827 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1828 if not self._match_text_seq("GRANTS"): 1829 self._retreat(self._index - 1) 1830 return None 1831 1832 return self.expression(exp.CopyGrantsProperty) 1833 1834 def _parse_freespace(self) -> exp.FreespaceProperty: 1835 self._match(TokenType.EQ) 1836 return self.expression( 1837 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1838 ) 1839 1840 def _parse_mergeblockratio( 1841 self, no: bool = False, default: bool = False 1842 ) -> exp.MergeBlockRatioProperty: 1843 if self._match(TokenType.EQ): 1844 return self.expression( 1845 exp.MergeBlockRatioProperty, 1846 this=self._parse_number(), 1847 percent=self._match(TokenType.PERCENT), 1848 ) 1849 1850 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1851 1852 def _parse_datablocksize( 1853 self, 1854 default: t.Optional[bool] = None, 1855 minimum: t.Optional[bool] = None, 1856 maximum: t.Optional[bool] = None, 1857 ) -> exp.DataBlocksizeProperty: 1858 self._match(TokenType.EQ) 1859 size = self._parse_number() 1860 1861 units = None 1862 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1863 units = self._prev.text 1864 1865 return self.expression( 1866 exp.DataBlocksizeProperty, 1867 size=size, 1868 units=units, 1869 default=default, 1870 minimum=minimum, 1871 maximum=maximum, 1872 ) 1873 1874 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1875 self._match(TokenType.EQ) 1876 always = self._match_text_seq("ALWAYS") 1877 manual = self._match_text_seq("MANUAL") 1878 never = self._match_text_seq("NEVER") 1879 default = self._match_text_seq("DEFAULT") 1880 1881 autotemp = None 1882 if self._match_text_seq("AUTOTEMP"): 1883 autotemp = self._parse_schema() 1884 1885 return self.expression( 1886 exp.BlockCompressionProperty, 1887 always=always, 1888 manual=manual, 1889 never=never, 1890 default=default, 1891 autotemp=autotemp, 1892 ) 1893 1894 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1895 no = self._match_text_seq("NO") 1896 concurrent = self._match_text_seq("CONCURRENT") 1897 self._match_text_seq("ISOLATED", "LOADING") 1898 for_all = self._match_text_seq("FOR", "ALL") 1899 for_insert = self._match_text_seq("FOR", "INSERT") 1900 for_none = self._match_text_seq("FOR", "NONE") 1901 return self.expression( 1902 exp.IsolatedLoadingProperty, 1903 no=no, 1904 concurrent=concurrent, 1905 for_all=for_all, 1906 for_insert=for_insert, 1907 for_none=for_none, 1908 ) 1909 1910 def _parse_locking(self) -> exp.LockingProperty: 1911 if self._match(TokenType.TABLE): 1912 kind = "TABLE" 1913 elif self._match(TokenType.VIEW): 1914 kind = "VIEW" 1915 elif self._match(TokenType.ROW): 1916 kind = "ROW" 1917 elif self._match_text_seq("DATABASE"): 1918 kind = "DATABASE" 1919 else: 1920 kind = None 1921 1922 if kind in ("DATABASE", "TABLE", "VIEW"): 1923 this = self._parse_table_parts() 1924 else: 1925 this = None 1926 1927 if self._match(TokenType.FOR): 1928 for_or_in = "FOR" 1929 elif self._match(TokenType.IN): 1930 for_or_in = "IN" 1931 else: 1932 for_or_in = None 1933 1934 if self._match_text_seq("ACCESS"): 1935 lock_type = "ACCESS" 1936 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1937 lock_type = "EXCLUSIVE" 1938 elif self._match_text_seq("SHARE"): 1939 lock_type = "SHARE" 1940 elif self._match_text_seq("READ"): 1941 lock_type = "READ" 1942 elif self._match_text_seq("WRITE"): 1943 lock_type = "WRITE" 1944 elif self._match_text_seq("CHECKSUM"): 1945 lock_type = "CHECKSUM" 1946 else: 1947 lock_type = None 1948 1949 override = self._match_text_seq("OVERRIDE") 1950 1951 return self.expression( 1952 exp.LockingProperty, 1953 this=this, 1954 kind=kind, 1955 for_or_in=for_or_in, 1956 lock_type=lock_type, 1957 override=override, 1958 ) 1959 1960 def _parse_partition_by(self) -> t.List[exp.Expression]: 1961 if self._match(TokenType.PARTITION_BY): 1962 return self._parse_csv(self._parse_conjunction) 1963 return [] 1964 1965 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1966 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1967 if self._match_text_seq("MINVALUE"): 1968 return exp.var("MINVALUE") 1969 if self._match_text_seq("MAXVALUE"): 1970 return exp.var("MAXVALUE") 1971 return self._parse_bitwise() 1972 1973 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1974 expression = None 1975 from_expressions = None 1976 to_expressions = None 1977 1978 if self._match(TokenType.IN): 1979 this = self._parse_wrapped_csv(self._parse_bitwise) 1980 elif self._match(TokenType.FROM): 1981 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1982 self._match_text_seq("TO") 1983 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1984 elif self._match_text_seq("WITH", "(", "MODULUS"): 1985 this = self._parse_number() 1986 self._match_text_seq(",", "REMAINDER") 1987 expression = self._parse_number() 1988 self._match_r_paren() 1989 else: 1990 self.raise_error("Failed to parse partition bound spec.") 1991 1992 return self.expression( 1993 exp.PartitionBoundSpec, 1994 this=this, 1995 expression=expression, 1996 from_expressions=from_expressions, 1997 to_expressions=to_expressions, 1998 ) 1999 2000 # https://www.postgresql.org/docs/current/sql-createtable.html 2001 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2002 if not self._match_text_seq("OF"): 2003 self._retreat(self._index - 1) 2004 return None 2005 2006 this = self._parse_table(schema=True) 2007 2008 if self._match(TokenType.DEFAULT): 2009 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2010 elif self._match_text_seq("FOR", "VALUES"): 2011 expression = self._parse_partition_bound_spec() 2012 else: 2013 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2014 2015 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2016 2017 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2018 self._match(TokenType.EQ) 2019 return self.expression( 2020 exp.PartitionedByProperty, 2021 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2022 ) 2023 2024 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2025 if self._match_text_seq("AND", "STATISTICS"): 2026 statistics = True 2027 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2028 statistics = False 2029 else: 2030 statistics = None 2031 2032 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2033 2034 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2035 if self._match_text_seq("SQL"): 2036 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2037 return None 2038 2039 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2040 if self._match_text_seq("SQL", "DATA"): 2041 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2042 return None 2043 2044 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2045 if self._match_text_seq("PRIMARY", "INDEX"): 2046 return exp.NoPrimaryIndexProperty() 2047 if self._match_text_seq("SQL"): 2048 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2049 return None 2050 2051 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2052 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2053 return exp.OnCommitProperty() 2054 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2055 return exp.OnCommitProperty(delete=True) 2056 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2057 2058 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2059 if self._match_text_seq("SQL", "DATA"): 2060 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2061 return None 2062 2063 def _parse_distkey(self) -> exp.DistKeyProperty: 2064 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2065 2066 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2067 table = self._parse_table(schema=True) 2068 2069 options = [] 2070 while self._match_texts(("INCLUDING", "EXCLUDING")): 2071 this = self._prev.text.upper() 2072 2073 id_var = self._parse_id_var() 2074 if not id_var: 2075 return None 2076 2077 options.append( 2078 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2079 ) 2080 2081 return self.expression(exp.LikeProperty, this=table, expressions=options) 2082 2083 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2084 return self.expression( 2085 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2086 ) 2087 2088 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2089 self._match(TokenType.EQ) 2090 return self.expression( 2091 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2092 ) 2093 2094 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2095 self._match_text_seq("WITH", "CONNECTION") 2096 return self.expression( 2097 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2098 ) 2099 2100 def _parse_returns(self) -> exp.ReturnsProperty: 2101 value: t.Optional[exp.Expression] 2102 is_table = self._match(TokenType.TABLE) 2103 2104 if is_table: 2105 if self._match(TokenType.LT): 2106 value = self.expression( 2107 exp.Schema, 2108 this="TABLE", 2109 expressions=self._parse_csv(self._parse_struct_types), 2110 ) 2111 if not self._match(TokenType.GT): 2112 self.raise_error("Expecting >") 2113 else: 2114 value = self._parse_schema(exp.var("TABLE")) 2115 else: 2116 value = self._parse_types() 2117 2118 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2119 2120 def _parse_describe(self) -> exp.Describe: 2121 kind = self._match_set(self.CREATABLES) and self._prev.text 2122 extended = self._match_text_seq("EXTENDED") 2123 this = self._parse_table(schema=True) 2124 properties = self._parse_properties() 2125 expressions = properties.expressions if properties else None 2126 return self.expression( 2127 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2128 ) 2129 2130 def _parse_insert(self) -> exp.Insert: 2131 comments = ensure_list(self._prev_comments) 2132 hint = self._parse_hint() 2133 overwrite = self._match(TokenType.OVERWRITE) 2134 ignore = self._match(TokenType.IGNORE) 2135 local = self._match_text_seq("LOCAL") 2136 alternative = None 2137 2138 if self._match_text_seq("DIRECTORY"): 2139 this: t.Optional[exp.Expression] = self.expression( 2140 exp.Directory, 2141 this=self._parse_var_or_string(), 2142 local=local, 2143 row_format=self._parse_row_format(match_row=True), 2144 ) 2145 else: 2146 if self._match(TokenType.OR): 2147 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2148 2149 self._match(TokenType.INTO) 2150 comments += ensure_list(self._prev_comments) 2151 self._match(TokenType.TABLE) 2152 this = self._parse_table(schema=True) 2153 2154 returning = self._parse_returning() 2155 2156 return self.expression( 2157 exp.Insert, 2158 comments=comments, 2159 hint=hint, 2160 this=this, 2161 by_name=self._match_text_seq("BY", "NAME"), 2162 exists=self._parse_exists(), 2163 partition=self._parse_partition(), 2164 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2165 and self._parse_conjunction(), 2166 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2167 conflict=self._parse_on_conflict(), 2168 returning=returning or self._parse_returning(), 2169 overwrite=overwrite, 2170 alternative=alternative, 2171 ignore=ignore, 2172 ) 2173 2174 def _parse_kill(self) -> exp.Kill: 2175 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2176 2177 return self.expression( 2178 exp.Kill, 2179 this=self._parse_primary(), 2180 kind=kind, 2181 ) 2182 2183 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2184 conflict = self._match_text_seq("ON", "CONFLICT") 2185 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2186 2187 if not conflict and not duplicate: 2188 return None 2189 2190 conflict_keys = None 2191 constraint = None 2192 2193 if conflict: 2194 if self._match_text_seq("ON", "CONSTRAINT"): 2195 constraint = self._parse_id_var() 2196 elif self._match(TokenType.L_PAREN): 2197 conflict_keys = self._parse_csv(self._parse_id_var) 2198 self._match_r_paren() 2199 2200 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2201 if self._prev.token_type == TokenType.UPDATE: 2202 self._match(TokenType.SET) 2203 expressions = self._parse_csv(self._parse_equality) 2204 else: 2205 expressions = None 2206 2207 return self.expression( 2208 exp.OnConflict, 2209 duplicate=duplicate, 2210 expressions=expressions, 2211 action=action, 2212 conflict_keys=conflict_keys, 2213 constraint=constraint, 2214 ) 2215 2216 def _parse_returning(self) -> t.Optional[exp.Returning]: 2217 if not self._match(TokenType.RETURNING): 2218 return None 2219 return self.expression( 2220 exp.Returning, 2221 expressions=self._parse_csv(self._parse_expression), 2222 into=self._match(TokenType.INTO) and self._parse_table_part(), 2223 ) 2224 2225 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2226 if not self._match(TokenType.FORMAT): 2227 return None 2228 return self._parse_row_format() 2229 2230 def _parse_row_format( 2231 self, match_row: bool = False 2232 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2233 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2234 return None 2235 2236 if self._match_text_seq("SERDE"): 2237 this = self._parse_string() 2238 2239 serde_properties = None 2240 if self._match(TokenType.SERDE_PROPERTIES): 2241 serde_properties = self.expression( 2242 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2243 ) 2244 2245 return self.expression( 2246 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2247 ) 2248 2249 self._match_text_seq("DELIMITED") 2250 2251 kwargs = {} 2252 2253 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2254 kwargs["fields"] = self._parse_string() 2255 if self._match_text_seq("ESCAPED", "BY"): 2256 kwargs["escaped"] = self._parse_string() 2257 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2258 kwargs["collection_items"] = self._parse_string() 2259 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2260 kwargs["map_keys"] = self._parse_string() 2261 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2262 kwargs["lines"] = self._parse_string() 2263 if self._match_text_seq("NULL", "DEFINED", "AS"): 2264 kwargs["null"] = self._parse_string() 2265 2266 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2267 2268 def _parse_load(self) -> exp.LoadData | exp.Command: 2269 if self._match_text_seq("DATA"): 2270 local = self._match_text_seq("LOCAL") 2271 self._match_text_seq("INPATH") 2272 inpath = self._parse_string() 2273 overwrite = self._match(TokenType.OVERWRITE) 2274 self._match_pair(TokenType.INTO, TokenType.TABLE) 2275 2276 return self.expression( 2277 exp.LoadData, 2278 this=self._parse_table(schema=True), 2279 local=local, 2280 overwrite=overwrite, 2281 inpath=inpath, 2282 partition=self._parse_partition(), 2283 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2284 serde=self._match_text_seq("SERDE") and self._parse_string(), 2285 ) 2286 return self._parse_as_command(self._prev) 2287 2288 def _parse_delete(self) -> exp.Delete: 2289 # This handles MySQL's "Multiple-Table Syntax" 2290 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2291 tables = None 2292 comments = self._prev_comments 2293 if not self._match(TokenType.FROM, advance=False): 2294 tables = self._parse_csv(self._parse_table) or None 2295 2296 returning = self._parse_returning() 2297 2298 return self.expression( 2299 exp.Delete, 2300 comments=comments, 2301 tables=tables, 2302 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2303 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2304 where=self._parse_where(), 2305 returning=returning or self._parse_returning(), 2306 limit=self._parse_limit(), 2307 ) 2308 2309 def _parse_update(self) -> exp.Update: 2310 comments = self._prev_comments 2311 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2312 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2313 returning = self._parse_returning() 2314 return self.expression( 2315 exp.Update, 2316 comments=comments, 2317 **{ # type: ignore 2318 "this": this, 2319 "expressions": expressions, 2320 "from": self._parse_from(joins=True), 2321 "where": self._parse_where(), 2322 "returning": returning or self._parse_returning(), 2323 "order": self._parse_order(), 2324 "limit": self._parse_limit(), 2325 }, 2326 ) 2327 2328 def _parse_uncache(self) -> exp.Uncache: 2329 if not self._match(TokenType.TABLE): 2330 self.raise_error("Expecting TABLE after UNCACHE") 2331 2332 return self.expression( 2333 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2334 ) 2335 2336 def _parse_cache(self) -> exp.Cache: 2337 lazy = self._match_text_seq("LAZY") 2338 self._match(TokenType.TABLE) 2339 table = self._parse_table(schema=True) 2340 2341 options = [] 2342 if self._match_text_seq("OPTIONS"): 2343 self._match_l_paren() 2344 k = self._parse_string() 2345 self._match(TokenType.EQ) 2346 v = self._parse_string() 2347 options = [k, v] 2348 self._match_r_paren() 2349 2350 self._match(TokenType.ALIAS) 2351 return self.expression( 2352 exp.Cache, 2353 this=table, 2354 lazy=lazy, 2355 options=options, 2356 expression=self._parse_select(nested=True), 2357 ) 2358 2359 def _parse_partition(self) -> t.Optional[exp.Partition]: 2360 if not self._match(TokenType.PARTITION): 2361 return None 2362 2363 return self.expression( 2364 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2365 ) 2366 2367 def _parse_value(self) -> exp.Tuple: 2368 if self._match(TokenType.L_PAREN): 2369 expressions = self._parse_csv(self._parse_expression) 2370 self._match_r_paren() 2371 return self.expression(exp.Tuple, expressions=expressions) 2372 2373 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2374 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2375 2376 def _parse_projections(self) -> t.List[exp.Expression]: 2377 return self._parse_expressions() 2378 2379 def _parse_select( 2380 self, 2381 nested: bool = False, 2382 table: bool = False, 2383 parse_subquery_alias: bool = True, 2384 parse_set_operation: bool = True, 2385 ) -> t.Optional[exp.Expression]: 2386 cte = self._parse_with() 2387 2388 if cte: 2389 this = self._parse_statement() 2390 2391 if not this: 2392 self.raise_error("Failed to parse any statement following CTE") 2393 return cte 2394 2395 if "with" in this.arg_types: 2396 this.set("with", cte) 2397 else: 2398 self.raise_error(f"{this.key} does not support CTE") 2399 this = cte 2400 2401 return this 2402 2403 # duckdb supports leading with FROM x 2404 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2405 2406 if self._match(TokenType.SELECT): 2407 comments = self._prev_comments 2408 2409 hint = self._parse_hint() 2410 all_ = self._match(TokenType.ALL) 2411 distinct = self._match_set(self.DISTINCT_TOKENS) 2412 2413 kind = ( 2414 self._match(TokenType.ALIAS) 2415 and self._match_texts(("STRUCT", "VALUE")) 2416 and self._prev.text.upper() 2417 ) 2418 2419 if distinct: 2420 distinct = self.expression( 2421 exp.Distinct, 2422 on=self._parse_value() if self._match(TokenType.ON) else None, 2423 ) 2424 2425 if all_ and distinct: 2426 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2427 2428 limit = self._parse_limit(top=True) 2429 projections = self._parse_projections() 2430 2431 this = self.expression( 2432 exp.Select, 2433 kind=kind, 2434 hint=hint, 2435 distinct=distinct, 2436 expressions=projections, 2437 limit=limit, 2438 ) 2439 this.comments = comments 2440 2441 into = self._parse_into() 2442 if into: 2443 this.set("into", into) 2444 2445 if not from_: 2446 from_ = self._parse_from() 2447 2448 if from_: 2449 this.set("from", from_) 2450 2451 this = self._parse_query_modifiers(this) 2452 elif (table or nested) and self._match(TokenType.L_PAREN): 2453 if self._match(TokenType.PIVOT): 2454 this = self._parse_simplified_pivot() 2455 elif self._match(TokenType.FROM): 2456 this = exp.select("*").from_( 2457 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2458 ) 2459 else: 2460 this = ( 2461 self._parse_table() 2462 if table 2463 else self._parse_select(nested=True, parse_set_operation=False) 2464 ) 2465 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2466 2467 self._match_r_paren() 2468 2469 # We return early here so that the UNION isn't attached to the subquery by the 2470 # following call to _parse_set_operations, but instead becomes the parent node 2471 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2472 elif self._match(TokenType.VALUES, advance=False): 2473 this = self._parse_derived_table_values() 2474 elif from_: 2475 this = exp.select("*").from_(from_.this, copy=False) 2476 else: 2477 this = None 2478 2479 if parse_set_operation: 2480 return self._parse_set_operations(this) 2481 return this 2482 2483 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2484 if not skip_with_token and not self._match(TokenType.WITH): 2485 return None 2486 2487 comments = self._prev_comments 2488 recursive = self._match(TokenType.RECURSIVE) 2489 2490 expressions = [] 2491 while True: 2492 expressions.append(self._parse_cte()) 2493 2494 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2495 break 2496 else: 2497 self._match(TokenType.WITH) 2498 2499 return self.expression( 2500 exp.With, comments=comments, expressions=expressions, recursive=recursive 2501 ) 2502 2503 def _parse_cte(self) -> exp.CTE: 2504 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2505 if not alias or not alias.this: 2506 self.raise_error("Expected CTE to have alias") 2507 2508 self._match(TokenType.ALIAS) 2509 return self.expression( 2510 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2511 ) 2512 2513 def _parse_table_alias( 2514 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2515 ) -> t.Optional[exp.TableAlias]: 2516 any_token = self._match(TokenType.ALIAS) 2517 alias = ( 2518 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2519 or self._parse_string_as_identifier() 2520 ) 2521 2522 index = self._index 2523 if self._match(TokenType.L_PAREN): 2524 columns = self._parse_csv(self._parse_function_parameter) 2525 self._match_r_paren() if columns else self._retreat(index) 2526 else: 2527 columns = None 2528 2529 if not alias and not columns: 2530 return None 2531 2532 return self.expression(exp.TableAlias, this=alias, columns=columns) 2533 2534 def _parse_subquery( 2535 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2536 ) -> t.Optional[exp.Subquery]: 2537 if not this: 2538 return None 2539 2540 return self.expression( 2541 exp.Subquery, 2542 this=this, 2543 pivots=self._parse_pivots(), 2544 alias=self._parse_table_alias() if parse_alias else None, 2545 ) 2546 2547 def _implicit_unnests_to_explicit(self, this: E) -> E: 2548 from sqlglot.optimizer.normalize_identifiers import ( 2549 normalize_identifiers as _norm, 2550 ) 2551 2552 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2553 for i, join in enumerate(this.args.get("joins") or []): 2554 table = join.this 2555 normalized_table = table.copy() 2556 normalized_table.meta["maybe_column"] = True 2557 normalized_table = _norm(normalized_table, dialect=self.dialect) 2558 2559 if isinstance(table, exp.Table) and not join.args.get("on"): 2560 if normalized_table.parts[0].name in refs: 2561 table_as_column = table.to_column() 2562 unnest = exp.Unnest(expressions=[table_as_column]) 2563 2564 # Table.to_column creates a parent Alias node that we want to convert to 2565 # a TableAlias and attach to the Unnest, so it matches the parser's output 2566 if isinstance(table.args.get("alias"), exp.TableAlias): 2567 table_as_column.replace(table_as_column.this) 2568 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2569 2570 table.replace(unnest) 2571 2572 refs.add(normalized_table.alias_or_name) 2573 2574 return this 2575 2576 def _parse_query_modifiers( 2577 self, this: t.Optional[exp.Expression] 2578 ) -> t.Optional[exp.Expression]: 2579 if isinstance(this, (exp.Query, exp.Table)): 2580 for join in iter(self._parse_join, None): 2581 this.append("joins", join) 2582 for lateral in iter(self._parse_lateral, None): 2583 this.append("laterals", lateral) 2584 2585 while True: 2586 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2587 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2588 key, expression = parser(self) 2589 2590 if expression: 2591 this.set(key, expression) 2592 if key == "limit": 2593 offset = expression.args.pop("offset", None) 2594 2595 if offset: 2596 offset = exp.Offset(expression=offset) 2597 this.set("offset", offset) 2598 2599 limit_by_expressions = expression.expressions 2600 expression.set("expressions", None) 2601 offset.set("expressions", limit_by_expressions) 2602 continue 2603 break 2604 2605 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2606 this = self._implicit_unnests_to_explicit(this) 2607 2608 return this 2609 2610 def _parse_hint(self) -> t.Optional[exp.Hint]: 2611 if self._match(TokenType.HINT): 2612 hints = [] 2613 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2614 hints.extend(hint) 2615 2616 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2617 self.raise_error("Expected */ after HINT") 2618 2619 return self.expression(exp.Hint, expressions=hints) 2620 2621 return None 2622 2623 def _parse_into(self) -> t.Optional[exp.Into]: 2624 if not self._match(TokenType.INTO): 2625 return None 2626 2627 temp = self._match(TokenType.TEMPORARY) 2628 unlogged = self._match_text_seq("UNLOGGED") 2629 self._match(TokenType.TABLE) 2630 2631 return self.expression( 2632 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2633 ) 2634 2635 def _parse_from( 2636 self, joins: bool = False, skip_from_token: bool = False 2637 ) -> t.Optional[exp.From]: 2638 if not skip_from_token and not self._match(TokenType.FROM): 2639 return None 2640 2641 return self.expression( 2642 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2643 ) 2644 2645 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2646 if not self._match(TokenType.MATCH_RECOGNIZE): 2647 return None 2648 2649 self._match_l_paren() 2650 2651 partition = self._parse_partition_by() 2652 order = self._parse_order() 2653 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2654 2655 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2656 rows = exp.var("ONE ROW PER MATCH") 2657 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2658 text = "ALL ROWS PER MATCH" 2659 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2660 text += " SHOW EMPTY MATCHES" 2661 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2662 text += " OMIT EMPTY MATCHES" 2663 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2664 text += " WITH UNMATCHED ROWS" 2665 rows = exp.var(text) 2666 else: 2667 rows = None 2668 2669 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2670 text = "AFTER MATCH SKIP" 2671 if self._match_text_seq("PAST", "LAST", "ROW"): 2672 text += " PAST LAST ROW" 2673 elif self._match_text_seq("TO", "NEXT", "ROW"): 2674 text += " TO NEXT ROW" 2675 elif self._match_text_seq("TO", "FIRST"): 2676 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2677 elif self._match_text_seq("TO", "LAST"): 2678 text += f" TO LAST {self._advance_any().text}" # type: ignore 2679 after = exp.var(text) 2680 else: 2681 after = None 2682 2683 if self._match_text_seq("PATTERN"): 2684 self._match_l_paren() 2685 2686 if not self._curr: 2687 self.raise_error("Expecting )", self._curr) 2688 2689 paren = 1 2690 start = self._curr 2691 2692 while self._curr and paren > 0: 2693 if self._curr.token_type == TokenType.L_PAREN: 2694 paren += 1 2695 if self._curr.token_type == TokenType.R_PAREN: 2696 paren -= 1 2697 2698 end = self._prev 2699 self._advance() 2700 2701 if paren > 0: 2702 self.raise_error("Expecting )", self._curr) 2703 2704 pattern = exp.var(self._find_sql(start, end)) 2705 else: 2706 pattern = None 2707 2708 define = ( 2709 self._parse_csv(self._parse_name_as_expression) 2710 if self._match_text_seq("DEFINE") 2711 else None 2712 ) 2713 2714 self._match_r_paren() 2715 2716 return self.expression( 2717 exp.MatchRecognize, 2718 partition_by=partition, 2719 order=order, 2720 measures=measures, 2721 rows=rows, 2722 after=after, 2723 pattern=pattern, 2724 define=define, 2725 alias=self._parse_table_alias(), 2726 ) 2727 2728 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2729 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2730 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2731 cross_apply = False 2732 2733 if cross_apply is not None: 2734 this = self._parse_select(table=True) 2735 view = None 2736 outer = None 2737 elif self._match(TokenType.LATERAL): 2738 this = self._parse_select(table=True) 2739 view = self._match(TokenType.VIEW) 2740 outer = self._match(TokenType.OUTER) 2741 else: 2742 return None 2743 2744 if not this: 2745 this = ( 2746 self._parse_unnest() 2747 or self._parse_function() 2748 or self._parse_id_var(any_token=False) 2749 ) 2750 2751 while self._match(TokenType.DOT): 2752 this = exp.Dot( 2753 this=this, 2754 expression=self._parse_function() or self._parse_id_var(any_token=False), 2755 ) 2756 2757 if view: 2758 table = self._parse_id_var(any_token=False) 2759 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2760 table_alias: t.Optional[exp.TableAlias] = self.expression( 2761 exp.TableAlias, this=table, columns=columns 2762 ) 2763 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2764 # We move the alias from the lateral's child node to the lateral itself 2765 table_alias = this.args["alias"].pop() 2766 else: 2767 table_alias = self._parse_table_alias() 2768 2769 return self.expression( 2770 exp.Lateral, 2771 this=this, 2772 view=view, 2773 outer=outer, 2774 alias=table_alias, 2775 cross_apply=cross_apply, 2776 ) 2777 2778 def _parse_join_parts( 2779 self, 2780 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2781 return ( 2782 self._match_set(self.JOIN_METHODS) and self._prev, 2783 self._match_set(self.JOIN_SIDES) and self._prev, 2784 self._match_set(self.JOIN_KINDS) and self._prev, 2785 ) 2786 2787 def _parse_join( 2788 self, skip_join_token: bool = False, parse_bracket: bool = False 2789 ) -> t.Optional[exp.Join]: 2790 if self._match(TokenType.COMMA): 2791 return self.expression(exp.Join, this=self._parse_table()) 2792 2793 index = self._index 2794 method, side, kind = self._parse_join_parts() 2795 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2796 join = self._match(TokenType.JOIN) 2797 2798 if not skip_join_token and not join: 2799 self._retreat(index) 2800 kind = None 2801 method = None 2802 side = None 2803 2804 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2805 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2806 2807 if not skip_join_token and not join and not outer_apply and not cross_apply: 2808 return None 2809 2810 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2811 2812 if method: 2813 kwargs["method"] = method.text 2814 if side: 2815 kwargs["side"] = side.text 2816 if kind: 2817 kwargs["kind"] = kind.text 2818 if hint: 2819 kwargs["hint"] = hint 2820 2821 if self._match(TokenType.ON): 2822 kwargs["on"] = self._parse_conjunction() 2823 elif self._match(TokenType.USING): 2824 kwargs["using"] = self._parse_wrapped_id_vars() 2825 elif not (kind and kind.token_type == TokenType.CROSS): 2826 index = self._index 2827 join = self._parse_join() 2828 2829 if join and self._match(TokenType.ON): 2830 kwargs["on"] = self._parse_conjunction() 2831 elif join and self._match(TokenType.USING): 2832 kwargs["using"] = self._parse_wrapped_id_vars() 2833 else: 2834 join = None 2835 self._retreat(index) 2836 2837 kwargs["this"].set("joins", [join] if join else None) 2838 2839 comments = [c for token in (method, side, kind) if token for c in token.comments] 2840 return self.expression(exp.Join, comments=comments, **kwargs) 2841 2842 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2843 this = self._parse_conjunction() 2844 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2845 return this 2846 2847 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2848 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2849 2850 return this 2851 2852 def _parse_index( 2853 self, 2854 index: t.Optional[exp.Expression] = None, 2855 ) -> t.Optional[exp.Index]: 2856 if index: 2857 unique = None 2858 primary = None 2859 amp = None 2860 2861 self._match(TokenType.ON) 2862 self._match(TokenType.TABLE) # hive 2863 table = self._parse_table_parts(schema=True) 2864 else: 2865 unique = self._match(TokenType.UNIQUE) 2866 primary = self._match_text_seq("PRIMARY") 2867 amp = self._match_text_seq("AMP") 2868 2869 if not self._match(TokenType.INDEX): 2870 return None 2871 2872 index = self._parse_id_var() 2873 table = None 2874 2875 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2876 2877 if self._match(TokenType.L_PAREN, advance=False): 2878 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2879 else: 2880 columns = None 2881 2882 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2883 2884 return self.expression( 2885 exp.Index, 2886 this=index, 2887 table=table, 2888 using=using, 2889 columns=columns, 2890 unique=unique, 2891 primary=primary, 2892 amp=amp, 2893 include=include, 2894 partition_by=self._parse_partition_by(), 2895 where=self._parse_where(), 2896 ) 2897 2898 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2899 hints: t.List[exp.Expression] = [] 2900 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2901 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2902 hints.append( 2903 self.expression( 2904 exp.WithTableHint, 2905 expressions=self._parse_csv( 2906 lambda: self._parse_function() or self._parse_var(any_token=True) 2907 ), 2908 ) 2909 ) 2910 self._match_r_paren() 2911 else: 2912 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2913 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2914 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2915 2916 self._match_texts(("INDEX", "KEY")) 2917 if self._match(TokenType.FOR): 2918 hint.set("target", self._advance_any() and self._prev.text.upper()) 2919 2920 hint.set("expressions", self._parse_wrapped_id_vars()) 2921 hints.append(hint) 2922 2923 return hints or None 2924 2925 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2926 return ( 2927 (not schema and self._parse_function(optional_parens=False)) 2928 or self._parse_id_var(any_token=False) 2929 or self._parse_string_as_identifier() 2930 or self._parse_placeholder() 2931 ) 2932 2933 def _parse_table_parts( 2934 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 2935 ) -> exp.Table: 2936 catalog = None 2937 db = None 2938 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2939 2940 while self._match(TokenType.DOT): 2941 if catalog: 2942 # This allows nesting the table in arbitrarily many dot expressions if needed 2943 table = self.expression( 2944 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2945 ) 2946 else: 2947 catalog = db 2948 db = table 2949 # "" used for tsql FROM a..b case 2950 table = self._parse_table_part(schema=schema) or "" 2951 2952 if ( 2953 wildcard 2954 and self._is_connected() 2955 and (isinstance(table, exp.Identifier) or not table) 2956 and self._match(TokenType.STAR) 2957 ): 2958 if isinstance(table, exp.Identifier): 2959 table.args["this"] += "*" 2960 else: 2961 table = exp.Identifier(this="*") 2962 2963 if is_db_reference: 2964 catalog = db 2965 db = table 2966 table = None 2967 2968 if not table and not is_db_reference: 2969 self.raise_error(f"Expected table name but got {self._curr}") 2970 if not db and is_db_reference: 2971 self.raise_error(f"Expected database name but got {self._curr}") 2972 2973 return self.expression( 2974 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2975 ) 2976 2977 def _parse_table( 2978 self, 2979 schema: bool = False, 2980 joins: bool = False, 2981 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2982 parse_bracket: bool = False, 2983 is_db_reference: bool = False, 2984 ) -> t.Optional[exp.Expression]: 2985 lateral = self._parse_lateral() 2986 if lateral: 2987 return lateral 2988 2989 unnest = self._parse_unnest() 2990 if unnest: 2991 return unnest 2992 2993 values = self._parse_derived_table_values() 2994 if values: 2995 return values 2996 2997 subquery = self._parse_select(table=True) 2998 if subquery: 2999 if not subquery.args.get("pivots"): 3000 subquery.set("pivots", self._parse_pivots()) 3001 return subquery 3002 3003 bracket = parse_bracket and self._parse_bracket(None) 3004 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3005 3006 only = self._match(TokenType.ONLY) 3007 3008 this = t.cast( 3009 exp.Expression, 3010 bracket 3011 or self._parse_bracket( 3012 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3013 ), 3014 ) 3015 3016 if only: 3017 this.set("only", only) 3018 3019 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3020 self._match_text_seq("*") 3021 3022 if schema: 3023 return self._parse_schema(this=this) 3024 3025 version = self._parse_version() 3026 3027 if version: 3028 this.set("version", version) 3029 3030 if self.dialect.ALIAS_POST_TABLESAMPLE: 3031 table_sample = self._parse_table_sample() 3032 3033 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3034 if alias: 3035 this.set("alias", alias) 3036 3037 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3038 return self.expression( 3039 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3040 ) 3041 3042 this.set("hints", self._parse_table_hints()) 3043 3044 if not this.args.get("pivots"): 3045 this.set("pivots", self._parse_pivots()) 3046 3047 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3048 table_sample = self._parse_table_sample() 3049 3050 if table_sample: 3051 table_sample.set("this", this) 3052 this = table_sample 3053 3054 if joins: 3055 for join in iter(self._parse_join, None): 3056 this.append("joins", join) 3057 3058 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3059 this.set("ordinality", True) 3060 this.set("alias", self._parse_table_alias()) 3061 3062 return this 3063 3064 def _parse_version(self) -> t.Optional[exp.Version]: 3065 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3066 this = "TIMESTAMP" 3067 elif self._match(TokenType.VERSION_SNAPSHOT): 3068 this = "VERSION" 3069 else: 3070 return None 3071 3072 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3073 kind = self._prev.text.upper() 3074 start = self._parse_bitwise() 3075 self._match_texts(("TO", "AND")) 3076 end = self._parse_bitwise() 3077 expression: t.Optional[exp.Expression] = self.expression( 3078 exp.Tuple, expressions=[start, end] 3079 ) 3080 elif self._match_text_seq("CONTAINED", "IN"): 3081 kind = "CONTAINED IN" 3082 expression = self.expression( 3083 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3084 ) 3085 elif self._match(TokenType.ALL): 3086 kind = "ALL" 3087 expression = None 3088 else: 3089 self._match_text_seq("AS", "OF") 3090 kind = "AS OF" 3091 expression = self._parse_type() 3092 3093 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3094 3095 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3096 if not self._match(TokenType.UNNEST): 3097 return None 3098 3099 expressions = self._parse_wrapped_csv(self._parse_equality) 3100 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3101 3102 alias = self._parse_table_alias() if with_alias else None 3103 3104 if alias: 3105 if self.dialect.UNNEST_COLUMN_ONLY: 3106 if alias.args.get("columns"): 3107 self.raise_error("Unexpected extra column alias in unnest.") 3108 3109 alias.set("columns", [alias.this]) 3110 alias.set("this", None) 3111 3112 columns = alias.args.get("columns") or [] 3113 if offset and len(expressions) < len(columns): 3114 offset = columns.pop() 3115 3116 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3117 self._match(TokenType.ALIAS) 3118 offset = self._parse_id_var( 3119 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3120 ) or exp.to_identifier("offset") 3121 3122 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3123 3124 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3125 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3126 if not is_derived and not self._match_text_seq("VALUES"): 3127 return None 3128 3129 expressions = self._parse_csv(self._parse_value) 3130 alias = self._parse_table_alias() 3131 3132 if is_derived: 3133 self._match_r_paren() 3134 3135 return self.expression( 3136 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3137 ) 3138 3139 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3140 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3141 as_modifier and self._match_text_seq("USING", "SAMPLE") 3142 ): 3143 return None 3144 3145 bucket_numerator = None 3146 bucket_denominator = None 3147 bucket_field = None 3148 percent = None 3149 size = None 3150 seed = None 3151 3152 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3153 matched_l_paren = self._match(TokenType.L_PAREN) 3154 3155 if self.TABLESAMPLE_CSV: 3156 num = None 3157 expressions = self._parse_csv(self._parse_primary) 3158 else: 3159 expressions = None 3160 num = ( 3161 self._parse_factor() 3162 if self._match(TokenType.NUMBER, advance=False) 3163 else self._parse_primary() or self._parse_placeholder() 3164 ) 3165 3166 if self._match_text_seq("BUCKET"): 3167 bucket_numerator = self._parse_number() 3168 self._match_text_seq("OUT", "OF") 3169 bucket_denominator = bucket_denominator = self._parse_number() 3170 self._match(TokenType.ON) 3171 bucket_field = self._parse_field() 3172 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3173 percent = num 3174 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3175 size = num 3176 else: 3177 percent = num 3178 3179 if matched_l_paren: 3180 self._match_r_paren() 3181 3182 if self._match(TokenType.L_PAREN): 3183 method = self._parse_var(upper=True) 3184 seed = self._match(TokenType.COMMA) and self._parse_number() 3185 self._match_r_paren() 3186 elif self._match_texts(("SEED", "REPEATABLE")): 3187 seed = self._parse_wrapped(self._parse_number) 3188 3189 return self.expression( 3190 exp.TableSample, 3191 expressions=expressions, 3192 method=method, 3193 bucket_numerator=bucket_numerator, 3194 bucket_denominator=bucket_denominator, 3195 bucket_field=bucket_field, 3196 percent=percent, 3197 size=size, 3198 seed=seed, 3199 ) 3200 3201 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3202 return list(iter(self._parse_pivot, None)) or None 3203 3204 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 3205 return list(iter(self._parse_join, None)) or None 3206 3207 # https://duckdb.org/docs/sql/statements/pivot 3208 def _parse_simplified_pivot(self) -> exp.Pivot: 3209 def _parse_on() -> t.Optional[exp.Expression]: 3210 this = self._parse_bitwise() 3211 return self._parse_in(this) if self._match(TokenType.IN) else this 3212 3213 this = self._parse_table() 3214 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3215 using = self._match(TokenType.USING) and self._parse_csv( 3216 lambda: self._parse_alias(self._parse_function()) 3217 ) 3218 group = self._parse_group() 3219 return self.expression( 3220 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3221 ) 3222 3223 def _parse_pivot_in(self) -> exp.In: 3224 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3225 this = self._parse_conjunction() 3226 3227 self._match(TokenType.ALIAS) 3228 alias = self._parse_field() 3229 if alias: 3230 return self.expression(exp.PivotAlias, this=this, alias=alias) 3231 3232 return this 3233 3234 value = self._parse_column() 3235 3236 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3237 self.raise_error("Expecting IN (") 3238 3239 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3240 3241 self._match_r_paren() 3242 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3243 3244 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3245 index = self._index 3246 include_nulls = None 3247 3248 if self._match(TokenType.PIVOT): 3249 unpivot = False 3250 elif self._match(TokenType.UNPIVOT): 3251 unpivot = True 3252 3253 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3254 if self._match_text_seq("INCLUDE", "NULLS"): 3255 include_nulls = True 3256 elif self._match_text_seq("EXCLUDE", "NULLS"): 3257 include_nulls = False 3258 else: 3259 return None 3260 3261 expressions = [] 3262 3263 if not self._match(TokenType.L_PAREN): 3264 self._retreat(index) 3265 return None 3266 3267 if unpivot: 3268 expressions = self._parse_csv(self._parse_column) 3269 else: 3270 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3271 3272 if not expressions: 3273 self.raise_error("Failed to parse PIVOT's aggregation list") 3274 3275 if not self._match(TokenType.FOR): 3276 self.raise_error("Expecting FOR") 3277 3278 field = self._parse_pivot_in() 3279 3280 self._match_r_paren() 3281 3282 pivot = self.expression( 3283 exp.Pivot, 3284 expressions=expressions, 3285 field=field, 3286 unpivot=unpivot, 3287 include_nulls=include_nulls, 3288 ) 3289 3290 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3291 pivot.set("alias", self._parse_table_alias()) 3292 3293 if not unpivot: 3294 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3295 3296 columns: t.List[exp.Expression] = [] 3297 for fld in pivot.args["field"].expressions: 3298 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3299 for name in names: 3300 if self.PREFIXED_PIVOT_COLUMNS: 3301 name = f"{name}_{field_name}" if name else field_name 3302 else: 3303 name = f"{field_name}_{name}" if name else field_name 3304 3305 columns.append(exp.to_identifier(name)) 3306 3307 pivot.set("columns", columns) 3308 3309 return pivot 3310 3311 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3312 return [agg.alias for agg in aggregations] 3313 3314 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3315 if not skip_where_token and not self._match(TokenType.PREWHERE): 3316 return None 3317 3318 return self.expression( 3319 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3320 ) 3321 3322 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3323 if not skip_where_token and not self._match(TokenType.WHERE): 3324 return None 3325 3326 return self.expression( 3327 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3328 ) 3329 3330 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3331 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3332 return None 3333 3334 elements = defaultdict(list) 3335 3336 if self._match(TokenType.ALL): 3337 return self.expression(exp.Group, all=True) 3338 3339 while True: 3340 expressions = self._parse_csv(self._parse_conjunction) 3341 if expressions: 3342 elements["expressions"].extend(expressions) 3343 3344 grouping_sets = self._parse_grouping_sets() 3345 if grouping_sets: 3346 elements["grouping_sets"].extend(grouping_sets) 3347 3348 rollup = None 3349 cube = None 3350 totals = None 3351 3352 index = self._index 3353 with_ = self._match(TokenType.WITH) 3354 if self._match(TokenType.ROLLUP): 3355 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3356 elements["rollup"].extend(ensure_list(rollup)) 3357 3358 if self._match(TokenType.CUBE): 3359 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3360 elements["cube"].extend(ensure_list(cube)) 3361 3362 if self._match_text_seq("TOTALS"): 3363 totals = True 3364 elements["totals"] = True # type: ignore 3365 3366 if not (grouping_sets or rollup or cube or totals): 3367 if with_: 3368 self._retreat(index) 3369 break 3370 3371 return self.expression(exp.Group, **elements) # type: ignore 3372 3373 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3374 if not self._match(TokenType.GROUPING_SETS): 3375 return None 3376 3377 return self._parse_wrapped_csv(self._parse_grouping_set) 3378 3379 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3380 if self._match(TokenType.L_PAREN): 3381 grouping_set = self._parse_csv(self._parse_column) 3382 self._match_r_paren() 3383 return self.expression(exp.Tuple, expressions=grouping_set) 3384 3385 return self._parse_column() 3386 3387 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3388 if not skip_having_token and not self._match(TokenType.HAVING): 3389 return None 3390 return self.expression(exp.Having, this=self._parse_conjunction()) 3391 3392 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3393 if not self._match(TokenType.QUALIFY): 3394 return None 3395 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3396 3397 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3398 if skip_start_token: 3399 start = None 3400 elif self._match(TokenType.START_WITH): 3401 start = self._parse_conjunction() 3402 else: 3403 return None 3404 3405 self._match(TokenType.CONNECT_BY) 3406 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3407 exp.Prior, this=self._parse_bitwise() 3408 ) 3409 connect = self._parse_conjunction() 3410 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3411 3412 if not start and self._match(TokenType.START_WITH): 3413 start = self._parse_conjunction() 3414 3415 return self.expression(exp.Connect, start=start, connect=connect) 3416 3417 def _parse_name_as_expression(self) -> exp.Alias: 3418 return self.expression( 3419 exp.Alias, 3420 alias=self._parse_id_var(any_token=True), 3421 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3422 ) 3423 3424 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3425 if self._match_text_seq("INTERPOLATE"): 3426 return self._parse_wrapped_csv(self._parse_name_as_expression) 3427 return None 3428 3429 def _parse_order( 3430 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3431 ) -> t.Optional[exp.Expression]: 3432 siblings = None 3433 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3434 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3435 return this 3436 3437 siblings = True 3438 3439 return self.expression( 3440 exp.Order, 3441 this=this, 3442 expressions=self._parse_csv(self._parse_ordered), 3443 interpolate=self._parse_interpolate(), 3444 siblings=siblings, 3445 ) 3446 3447 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3448 if not self._match(token): 3449 return None 3450 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3451 3452 def _parse_ordered( 3453 self, parse_method: t.Optional[t.Callable] = None 3454 ) -> t.Optional[exp.Ordered]: 3455 this = parse_method() if parse_method else self._parse_conjunction() 3456 if not this: 3457 return None 3458 3459 asc = self._match(TokenType.ASC) 3460 desc = self._match(TokenType.DESC) or (asc and False) 3461 3462 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3463 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3464 3465 nulls_first = is_nulls_first or False 3466 explicitly_null_ordered = is_nulls_first or is_nulls_last 3467 3468 if ( 3469 not explicitly_null_ordered 3470 and ( 3471 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3472 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3473 ) 3474 and self.dialect.NULL_ORDERING != "nulls_are_last" 3475 ): 3476 nulls_first = True 3477 3478 if self._match_text_seq("WITH", "FILL"): 3479 with_fill = self.expression( 3480 exp.WithFill, 3481 **{ # type: ignore 3482 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3483 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3484 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3485 }, 3486 ) 3487 else: 3488 with_fill = None 3489 3490 return self.expression( 3491 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3492 ) 3493 3494 def _parse_limit( 3495 self, this: t.Optional[exp.Expression] = None, top: bool = False 3496 ) -> t.Optional[exp.Expression]: 3497 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3498 comments = self._prev_comments 3499 if top: 3500 limit_paren = self._match(TokenType.L_PAREN) 3501 expression = self._parse_term() if limit_paren else self._parse_number() 3502 3503 if limit_paren: 3504 self._match_r_paren() 3505 else: 3506 expression = self._parse_term() 3507 3508 if self._match(TokenType.COMMA): 3509 offset = expression 3510 expression = self._parse_term() 3511 else: 3512 offset = None 3513 3514 limit_exp = self.expression( 3515 exp.Limit, 3516 this=this, 3517 expression=expression, 3518 offset=offset, 3519 comments=comments, 3520 expressions=self._parse_limit_by(), 3521 ) 3522 3523 return limit_exp 3524 3525 if self._match(TokenType.FETCH): 3526 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3527 direction = self._prev.text.upper() if direction else "FIRST" 3528 3529 count = self._parse_field(tokens=self.FETCH_TOKENS) 3530 percent = self._match(TokenType.PERCENT) 3531 3532 self._match_set((TokenType.ROW, TokenType.ROWS)) 3533 3534 only = self._match_text_seq("ONLY") 3535 with_ties = self._match_text_seq("WITH", "TIES") 3536 3537 if only and with_ties: 3538 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3539 3540 return self.expression( 3541 exp.Fetch, 3542 direction=direction, 3543 count=count, 3544 percent=percent, 3545 with_ties=with_ties, 3546 ) 3547 3548 return this 3549 3550 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3551 if not self._match(TokenType.OFFSET): 3552 return this 3553 3554 count = self._parse_term() 3555 self._match_set((TokenType.ROW, TokenType.ROWS)) 3556 3557 return self.expression( 3558 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3559 ) 3560 3561 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3562 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3563 3564 def _parse_locks(self) -> t.List[exp.Lock]: 3565 locks = [] 3566 while True: 3567 if self._match_text_seq("FOR", "UPDATE"): 3568 update = True 3569 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3570 "LOCK", "IN", "SHARE", "MODE" 3571 ): 3572 update = False 3573 else: 3574 break 3575 3576 expressions = None 3577 if self._match_text_seq("OF"): 3578 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3579 3580 wait: t.Optional[bool | exp.Expression] = None 3581 if self._match_text_seq("NOWAIT"): 3582 wait = True 3583 elif self._match_text_seq("WAIT"): 3584 wait = self._parse_primary() 3585 elif self._match_text_seq("SKIP", "LOCKED"): 3586 wait = False 3587 3588 locks.append( 3589 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3590 ) 3591 3592 return locks 3593 3594 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3595 while this and self._match_set(self.SET_OPERATIONS): 3596 token_type = self._prev.token_type 3597 3598 if token_type == TokenType.UNION: 3599 operation = exp.Union 3600 elif token_type == TokenType.EXCEPT: 3601 operation = exp.Except 3602 else: 3603 operation = exp.Intersect 3604 3605 comments = self._prev.comments 3606 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3607 by_name = self._match_text_seq("BY", "NAME") 3608 expression = self._parse_select(nested=True, parse_set_operation=False) 3609 3610 this = self.expression( 3611 operation, 3612 comments=comments, 3613 this=this, 3614 distinct=distinct, 3615 by_name=by_name, 3616 expression=expression, 3617 ) 3618 3619 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3620 expression = this.expression 3621 3622 if expression: 3623 for arg in self.UNION_MODIFIERS: 3624 expr = expression.args.get(arg) 3625 if expr: 3626 this.set(arg, expr.pop()) 3627 3628 return this 3629 3630 def _parse_expression(self) -> t.Optional[exp.Expression]: 3631 return self._parse_alias(self._parse_conjunction()) 3632 3633 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3634 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3635 3636 def _parse_equality(self) -> t.Optional[exp.Expression]: 3637 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3638 3639 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3640 return self._parse_tokens(self._parse_range, self.COMPARISON) 3641 3642 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3643 this = this or self._parse_bitwise() 3644 negate = self._match(TokenType.NOT) 3645 3646 if self._match_set(self.RANGE_PARSERS): 3647 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3648 if not expression: 3649 return this 3650 3651 this = expression 3652 elif self._match(TokenType.ISNULL): 3653 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3654 3655 # Postgres supports ISNULL and NOTNULL for conditions. 3656 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3657 if self._match(TokenType.NOTNULL): 3658 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3659 this = self.expression(exp.Not, this=this) 3660 3661 if negate: 3662 this = self.expression(exp.Not, this=this) 3663 3664 if self._match(TokenType.IS): 3665 this = self._parse_is(this) 3666 3667 return this 3668 3669 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3670 index = self._index - 1 3671 negate = self._match(TokenType.NOT) 3672 3673 if self._match_text_seq("DISTINCT", "FROM"): 3674 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3675 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3676 3677 expression = self._parse_null() or self._parse_boolean() 3678 if not expression: 3679 self._retreat(index) 3680 return None 3681 3682 this = self.expression(exp.Is, this=this, expression=expression) 3683 return self.expression(exp.Not, this=this) if negate else this 3684 3685 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3686 unnest = self._parse_unnest(with_alias=False) 3687 if unnest: 3688 this = self.expression(exp.In, this=this, unnest=unnest) 3689 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3690 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3691 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3692 3693 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3694 this = self.expression(exp.In, this=this, query=expressions[0]) 3695 else: 3696 this = self.expression(exp.In, this=this, expressions=expressions) 3697 3698 if matched_l_paren: 3699 self._match_r_paren(this) 3700 elif not self._match(TokenType.R_BRACKET, expression=this): 3701 self.raise_error("Expecting ]") 3702 else: 3703 this = self.expression(exp.In, this=this, field=self._parse_field()) 3704 3705 return this 3706 3707 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3708 low = self._parse_bitwise() 3709 self._match(TokenType.AND) 3710 high = self._parse_bitwise() 3711 return self.expression(exp.Between, this=this, low=low, high=high) 3712 3713 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3714 if not self._match(TokenType.ESCAPE): 3715 return this 3716 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3717 3718 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3719 index = self._index 3720 3721 if not self._match(TokenType.INTERVAL) and match_interval: 3722 return None 3723 3724 if self._match(TokenType.STRING, advance=False): 3725 this = self._parse_primary() 3726 else: 3727 this = self._parse_term() 3728 3729 if not this or ( 3730 isinstance(this, exp.Column) 3731 and not this.table 3732 and not this.this.quoted 3733 and this.name.upper() == "IS" 3734 ): 3735 self._retreat(index) 3736 return None 3737 3738 unit = self._parse_function() or ( 3739 not self._match(TokenType.ALIAS, advance=False) 3740 and self._parse_var(any_token=True, upper=True) 3741 ) 3742 3743 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3744 # each INTERVAL expression into this canonical form so it's easy to transpile 3745 if this and this.is_number: 3746 this = exp.Literal.string(this.name) 3747 elif this and this.is_string: 3748 parts = this.name.split() 3749 3750 if len(parts) == 2: 3751 if unit: 3752 # This is not actually a unit, it's something else (e.g. a "window side") 3753 unit = None 3754 self._retreat(self._index - 1) 3755 3756 this = exp.Literal.string(parts[0]) 3757 unit = self.expression(exp.Var, this=parts[1].upper()) 3758 3759 return self.expression(exp.Interval, this=this, unit=unit) 3760 3761 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3762 this = self._parse_term() 3763 3764 while True: 3765 if self._match_set(self.BITWISE): 3766 this = self.expression( 3767 self.BITWISE[self._prev.token_type], 3768 this=this, 3769 expression=self._parse_term(), 3770 ) 3771 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3772 this = self.expression( 3773 exp.DPipe, 3774 this=this, 3775 expression=self._parse_term(), 3776 safe=not self.dialect.STRICT_STRING_CONCAT, 3777 ) 3778 elif self._match(TokenType.DQMARK): 3779 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3780 elif self._match_pair(TokenType.LT, TokenType.LT): 3781 this = self.expression( 3782 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3783 ) 3784 elif self._match_pair(TokenType.GT, TokenType.GT): 3785 this = self.expression( 3786 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3787 ) 3788 else: 3789 break 3790 3791 return this 3792 3793 def _parse_term(self) -> t.Optional[exp.Expression]: 3794 return self._parse_tokens(self._parse_factor, self.TERM) 3795 3796 def _parse_factor(self) -> t.Optional[exp.Expression]: 3797 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3798 this = parse_method() 3799 3800 while self._match_set(self.FACTOR): 3801 this = self.expression( 3802 self.FACTOR[self._prev.token_type], 3803 this=this, 3804 comments=self._prev_comments, 3805 expression=parse_method(), 3806 ) 3807 if isinstance(this, exp.Div): 3808 this.args["typed"] = self.dialect.TYPED_DIVISION 3809 this.args["safe"] = self.dialect.SAFE_DIVISION 3810 3811 return this 3812 3813 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3814 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3815 3816 def _parse_unary(self) -> t.Optional[exp.Expression]: 3817 if self._match_set(self.UNARY_PARSERS): 3818 return self.UNARY_PARSERS[self._prev.token_type](self) 3819 return self._parse_at_time_zone(self._parse_type()) 3820 3821 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3822 interval = parse_interval and self._parse_interval() 3823 if interval: 3824 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3825 while True: 3826 index = self._index 3827 self._match(TokenType.PLUS) 3828 3829 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3830 self._retreat(index) 3831 break 3832 3833 interval = self.expression( # type: ignore 3834 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3835 ) 3836 3837 return interval 3838 3839 index = self._index 3840 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3841 this = self._parse_column() 3842 3843 if data_type: 3844 if isinstance(this, exp.Literal): 3845 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3846 if parser: 3847 return parser(self, this, data_type) 3848 return self.expression(exp.Cast, this=this, to=data_type) 3849 if not data_type.expressions: 3850 self._retreat(index) 3851 return self._parse_column() 3852 return self._parse_column_ops(data_type) 3853 3854 return this and self._parse_column_ops(this) 3855 3856 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3857 this = self._parse_type() 3858 if not this: 3859 return None 3860 3861 return self.expression( 3862 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3863 ) 3864 3865 def _parse_types( 3866 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3867 ) -> t.Optional[exp.Expression]: 3868 index = self._index 3869 3870 prefix = self._match_text_seq("SYSUDTLIB", ".") 3871 3872 if not self._match_set(self.TYPE_TOKENS): 3873 identifier = allow_identifiers and self._parse_id_var( 3874 any_token=False, tokens=(TokenType.VAR,) 3875 ) 3876 if identifier: 3877 tokens = self.dialect.tokenize(identifier.name) 3878 3879 if len(tokens) != 1: 3880 self.raise_error("Unexpected identifier", self._prev) 3881 3882 if tokens[0].token_type in self.TYPE_TOKENS: 3883 self._prev = tokens[0] 3884 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3885 type_name = identifier.name 3886 3887 while self._match(TokenType.DOT): 3888 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3889 3890 return exp.DataType.build(type_name, udt=True) 3891 else: 3892 self._retreat(self._index - 1) 3893 return None 3894 else: 3895 return None 3896 3897 type_token = self._prev.token_type 3898 3899 if type_token == TokenType.PSEUDO_TYPE: 3900 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3901 3902 if type_token == TokenType.OBJECT_IDENTIFIER: 3903 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3904 3905 nested = type_token in self.NESTED_TYPE_TOKENS 3906 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3907 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3908 expressions = None 3909 maybe_func = False 3910 3911 if self._match(TokenType.L_PAREN): 3912 if is_struct: 3913 expressions = self._parse_csv(self._parse_struct_types) 3914 elif nested: 3915 expressions = self._parse_csv( 3916 lambda: self._parse_types( 3917 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3918 ) 3919 ) 3920 elif type_token in self.ENUM_TYPE_TOKENS: 3921 expressions = self._parse_csv(self._parse_equality) 3922 elif is_aggregate: 3923 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3924 any_token=False, tokens=(TokenType.VAR,) 3925 ) 3926 if not func_or_ident or not self._match(TokenType.COMMA): 3927 return None 3928 expressions = self._parse_csv( 3929 lambda: self._parse_types( 3930 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3931 ) 3932 ) 3933 expressions.insert(0, func_or_ident) 3934 else: 3935 expressions = self._parse_csv(self._parse_type_size) 3936 3937 if not expressions or not self._match(TokenType.R_PAREN): 3938 self._retreat(index) 3939 return None 3940 3941 maybe_func = True 3942 3943 this: t.Optional[exp.Expression] = None 3944 values: t.Optional[t.List[exp.Expression]] = None 3945 3946 if nested and self._match(TokenType.LT): 3947 if is_struct: 3948 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 3949 else: 3950 expressions = self._parse_csv( 3951 lambda: self._parse_types( 3952 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3953 ) 3954 ) 3955 3956 if not self._match(TokenType.GT): 3957 self.raise_error("Expecting >") 3958 3959 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3960 values = self._parse_csv(self._parse_conjunction) 3961 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3962 3963 if type_token in self.TIMESTAMPS: 3964 if self._match_text_seq("WITH", "TIME", "ZONE"): 3965 maybe_func = False 3966 tz_type = ( 3967 exp.DataType.Type.TIMETZ 3968 if type_token in self.TIMES 3969 else exp.DataType.Type.TIMESTAMPTZ 3970 ) 3971 this = exp.DataType(this=tz_type, expressions=expressions) 3972 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3973 maybe_func = False 3974 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3975 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3976 maybe_func = False 3977 elif type_token == TokenType.INTERVAL: 3978 unit = self._parse_var() 3979 3980 if self._match_text_seq("TO"): 3981 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3982 else: 3983 span = None 3984 3985 if span or not unit: 3986 this = self.expression( 3987 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3988 ) 3989 else: 3990 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3991 3992 if maybe_func and check_func: 3993 index2 = self._index 3994 peek = self._parse_string() 3995 3996 if not peek: 3997 self._retreat(index) 3998 return None 3999 4000 self._retreat(index2) 4001 4002 if not this: 4003 if self._match_text_seq("UNSIGNED"): 4004 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4005 if not unsigned_type_token: 4006 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4007 4008 type_token = unsigned_type_token or type_token 4009 4010 this = exp.DataType( 4011 this=exp.DataType.Type[type_token.value], 4012 expressions=expressions, 4013 nested=nested, 4014 values=values, 4015 prefix=prefix, 4016 ) 4017 4018 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4019 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4020 4021 return this 4022 4023 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4024 index = self._index 4025 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4026 self._match(TokenType.COLON) 4027 column_def = self._parse_column_def(this) 4028 4029 if type_required and ( 4030 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4031 ): 4032 self._retreat(index) 4033 return self._parse_types() 4034 4035 return column_def 4036 4037 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4038 if not self._match_text_seq("AT", "TIME", "ZONE"): 4039 return this 4040 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4041 4042 def _parse_column(self) -> t.Optional[exp.Expression]: 4043 this = self._parse_column_reference() 4044 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4045 4046 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4047 this = self._parse_field() 4048 if ( 4049 not this 4050 and self._match(TokenType.VALUES, advance=False) 4051 and self.VALUES_FOLLOWED_BY_PAREN 4052 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4053 ): 4054 this = self._parse_id_var() 4055 4056 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 4057 4058 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4059 this = self._parse_bracket(this) 4060 4061 while self._match_set(self.COLUMN_OPERATORS): 4062 op_token = self._prev.token_type 4063 op = self.COLUMN_OPERATORS.get(op_token) 4064 4065 if op_token == TokenType.DCOLON: 4066 field = self._parse_types() 4067 if not field: 4068 self.raise_error("Expected type") 4069 elif op and self._curr: 4070 field = self._parse_column_reference() 4071 else: 4072 field = self._parse_field(anonymous_func=True, any_token=True) 4073 4074 if isinstance(field, exp.Func): 4075 # bigquery allows function calls like x.y.count(...) 4076 # SAFE.SUBSTR(...) 4077 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4078 this = self._replace_columns_with_dots(this) 4079 4080 if op: 4081 this = op(self, this, field) 4082 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4083 this = self.expression( 4084 exp.Column, 4085 this=field, 4086 table=this.this, 4087 db=this.args.get("table"), 4088 catalog=this.args.get("db"), 4089 ) 4090 else: 4091 this = self.expression(exp.Dot, this=this, expression=field) 4092 this = self._parse_bracket(this) 4093 return this 4094 4095 def _parse_primary(self) -> t.Optional[exp.Expression]: 4096 if self._match_set(self.PRIMARY_PARSERS): 4097 token_type = self._prev.token_type 4098 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4099 4100 if token_type == TokenType.STRING: 4101 expressions = [primary] 4102 while self._match(TokenType.STRING): 4103 expressions.append(exp.Literal.string(self._prev.text)) 4104 4105 if len(expressions) > 1: 4106 return self.expression(exp.Concat, expressions=expressions) 4107 4108 return primary 4109 4110 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4111 return exp.Literal.number(f"0.{self._prev.text}") 4112 4113 if self._match(TokenType.L_PAREN): 4114 comments = self._prev_comments 4115 query = self._parse_select() 4116 4117 if query: 4118 expressions = [query] 4119 else: 4120 expressions = self._parse_expressions() 4121 4122 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4123 4124 if isinstance(this, exp.UNWRAPPED_QUERIES): 4125 this = self._parse_set_operations( 4126 self._parse_subquery(this=this, parse_alias=False) 4127 ) 4128 elif isinstance(this, exp.Subquery): 4129 this = self._parse_subquery( 4130 this=self._parse_set_operations(this), parse_alias=False 4131 ) 4132 elif len(expressions) > 1: 4133 this = self.expression(exp.Tuple, expressions=expressions) 4134 else: 4135 this = self.expression(exp.Paren, this=this) 4136 4137 if this: 4138 this.add_comments(comments) 4139 4140 self._match_r_paren(expression=this) 4141 return this 4142 4143 return None 4144 4145 def _parse_field( 4146 self, 4147 any_token: bool = False, 4148 tokens: t.Optional[t.Collection[TokenType]] = None, 4149 anonymous_func: bool = False, 4150 ) -> t.Optional[exp.Expression]: 4151 return ( 4152 self._parse_primary() 4153 or self._parse_function(anonymous=anonymous_func) 4154 or self._parse_id_var(any_token=any_token, tokens=tokens) 4155 ) 4156 4157 def _parse_function( 4158 self, 4159 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4160 anonymous: bool = False, 4161 optional_parens: bool = True, 4162 ) -> t.Optional[exp.Expression]: 4163 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4164 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4165 fn_syntax = False 4166 if ( 4167 self._match(TokenType.L_BRACE, advance=False) 4168 and self._next 4169 and self._next.text.upper() == "FN" 4170 ): 4171 self._advance(2) 4172 fn_syntax = True 4173 4174 func = self._parse_function_call( 4175 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4176 ) 4177 4178 if fn_syntax: 4179 self._match(TokenType.R_BRACE) 4180 4181 return func 4182 4183 def _parse_function_call( 4184 self, 4185 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4186 anonymous: bool = False, 4187 optional_parens: bool = True, 4188 ) -> t.Optional[exp.Expression]: 4189 if not self._curr: 4190 return None 4191 4192 comments = self._curr.comments 4193 token_type = self._curr.token_type 4194 this = self._curr.text 4195 upper = this.upper() 4196 4197 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4198 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4199 self._advance() 4200 return parser(self) 4201 4202 if not self._next or self._next.token_type != TokenType.L_PAREN: 4203 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4204 self._advance() 4205 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4206 4207 return None 4208 4209 if token_type not in self.FUNC_TOKENS: 4210 return None 4211 4212 self._advance(2) 4213 4214 parser = self.FUNCTION_PARSERS.get(upper) 4215 if parser and not anonymous: 4216 this = parser(self) 4217 else: 4218 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4219 4220 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4221 this = self.expression(subquery_predicate, this=self._parse_select()) 4222 self._match_r_paren() 4223 return this 4224 4225 if functions is None: 4226 functions = self.FUNCTIONS 4227 4228 function = functions.get(upper) 4229 4230 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4231 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4232 4233 if alias: 4234 args = self._kv_to_prop_eq(args) 4235 4236 if function and not anonymous: 4237 if "dialect" in function.__code__.co_varnames: 4238 func = function(args, dialect=self.dialect) 4239 else: 4240 func = function(args) 4241 4242 func = self.validate_expression(func, args) 4243 if not self.dialect.NORMALIZE_FUNCTIONS: 4244 func.meta["name"] = this 4245 4246 this = func 4247 else: 4248 if token_type == TokenType.IDENTIFIER: 4249 this = exp.Identifier(this=this, quoted=True) 4250 this = self.expression(exp.Anonymous, this=this, expressions=args) 4251 4252 if isinstance(this, exp.Expression): 4253 this.add_comments(comments) 4254 4255 self._match_r_paren(this) 4256 return self._parse_window(this) 4257 4258 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4259 transformed = [] 4260 4261 for e in expressions: 4262 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4263 if isinstance(e, exp.Alias): 4264 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4265 4266 if not isinstance(e, exp.PropertyEQ): 4267 e = self.expression( 4268 exp.PropertyEQ, this=exp.to_identifier(e.name), expression=e.expression 4269 ) 4270 4271 if isinstance(e.this, exp.Column): 4272 e.this.replace(e.this.this) 4273 4274 transformed.append(e) 4275 4276 return transformed 4277 4278 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4279 return self._parse_column_def(self._parse_id_var()) 4280 4281 def _parse_user_defined_function( 4282 self, kind: t.Optional[TokenType] = None 4283 ) -> t.Optional[exp.Expression]: 4284 this = self._parse_id_var() 4285 4286 while self._match(TokenType.DOT): 4287 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4288 4289 if not self._match(TokenType.L_PAREN): 4290 return this 4291 4292 expressions = self._parse_csv(self._parse_function_parameter) 4293 self._match_r_paren() 4294 return self.expression( 4295 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4296 ) 4297 4298 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4299 literal = self._parse_primary() 4300 if literal: 4301 return self.expression(exp.Introducer, this=token.text, expression=literal) 4302 4303 return self.expression(exp.Identifier, this=token.text) 4304 4305 def _parse_session_parameter(self) -> exp.SessionParameter: 4306 kind = None 4307 this = self._parse_id_var() or self._parse_primary() 4308 4309 if this and self._match(TokenType.DOT): 4310 kind = this.name 4311 this = self._parse_var() or self._parse_primary() 4312 4313 return self.expression(exp.SessionParameter, this=this, kind=kind) 4314 4315 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4316 index = self._index 4317 4318 if self._match(TokenType.L_PAREN): 4319 expressions = t.cast( 4320 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4321 ) 4322 4323 if not self._match(TokenType.R_PAREN): 4324 self._retreat(index) 4325 else: 4326 expressions = [self._parse_id_var()] 4327 4328 if self._match_set(self.LAMBDAS): 4329 return self.LAMBDAS[self._prev.token_type](self, expressions) 4330 4331 self._retreat(index) 4332 4333 this: t.Optional[exp.Expression] 4334 4335 if self._match(TokenType.DISTINCT): 4336 this = self.expression( 4337 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4338 ) 4339 else: 4340 this = self._parse_select_or_expression(alias=alias) 4341 4342 return self._parse_limit( 4343 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4344 ) 4345 4346 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4347 index = self._index 4348 4349 if not self.errors: 4350 try: 4351 if self._parse_select(nested=True): 4352 return this 4353 except ParseError: 4354 pass 4355 finally: 4356 self.errors.clear() 4357 self._retreat(index) 4358 4359 if not self._match(TokenType.L_PAREN): 4360 return this 4361 4362 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4363 4364 self._match_r_paren() 4365 return self.expression(exp.Schema, this=this, expressions=args) 4366 4367 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4368 return self._parse_column_def(self._parse_field(any_token=True)) 4369 4370 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4371 # column defs are not really columns, they're identifiers 4372 if isinstance(this, exp.Column): 4373 this = this.this 4374 4375 kind = self._parse_types(schema=True) 4376 4377 if self._match_text_seq("FOR", "ORDINALITY"): 4378 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4379 4380 constraints: t.List[exp.Expression] = [] 4381 4382 if not kind and self._match(TokenType.ALIAS): 4383 constraints.append( 4384 self.expression( 4385 exp.ComputedColumnConstraint, 4386 this=self._parse_conjunction(), 4387 persisted=self._match_text_seq("PERSISTED"), 4388 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4389 ) 4390 ) 4391 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4392 self._match(TokenType.ALIAS) 4393 constraints.append( 4394 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4395 ) 4396 4397 while True: 4398 constraint = self._parse_column_constraint() 4399 if not constraint: 4400 break 4401 constraints.append(constraint) 4402 4403 if not kind and not constraints: 4404 return this 4405 4406 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4407 4408 def _parse_auto_increment( 4409 self, 4410 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4411 start = None 4412 increment = None 4413 4414 if self._match(TokenType.L_PAREN, advance=False): 4415 args = self._parse_wrapped_csv(self._parse_bitwise) 4416 start = seq_get(args, 0) 4417 increment = seq_get(args, 1) 4418 elif self._match_text_seq("START"): 4419 start = self._parse_bitwise() 4420 self._match_text_seq("INCREMENT") 4421 increment = self._parse_bitwise() 4422 4423 if start and increment: 4424 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4425 4426 return exp.AutoIncrementColumnConstraint() 4427 4428 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4429 if not self._match_text_seq("REFRESH"): 4430 self._retreat(self._index - 1) 4431 return None 4432 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4433 4434 def _parse_compress(self) -> exp.CompressColumnConstraint: 4435 if self._match(TokenType.L_PAREN, advance=False): 4436 return self.expression( 4437 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4438 ) 4439 4440 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4441 4442 def _parse_generated_as_identity( 4443 self, 4444 ) -> ( 4445 exp.GeneratedAsIdentityColumnConstraint 4446 | exp.ComputedColumnConstraint 4447 | exp.GeneratedAsRowColumnConstraint 4448 ): 4449 if self._match_text_seq("BY", "DEFAULT"): 4450 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4451 this = self.expression( 4452 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4453 ) 4454 else: 4455 self._match_text_seq("ALWAYS") 4456 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4457 4458 self._match(TokenType.ALIAS) 4459 4460 if self._match_text_seq("ROW"): 4461 start = self._match_text_seq("START") 4462 if not start: 4463 self._match(TokenType.END) 4464 hidden = self._match_text_seq("HIDDEN") 4465 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4466 4467 identity = self._match_text_seq("IDENTITY") 4468 4469 if self._match(TokenType.L_PAREN): 4470 if self._match(TokenType.START_WITH): 4471 this.set("start", self._parse_bitwise()) 4472 if self._match_text_seq("INCREMENT", "BY"): 4473 this.set("increment", self._parse_bitwise()) 4474 if self._match_text_seq("MINVALUE"): 4475 this.set("minvalue", self._parse_bitwise()) 4476 if self._match_text_seq("MAXVALUE"): 4477 this.set("maxvalue", self._parse_bitwise()) 4478 4479 if self._match_text_seq("CYCLE"): 4480 this.set("cycle", True) 4481 elif self._match_text_seq("NO", "CYCLE"): 4482 this.set("cycle", False) 4483 4484 if not identity: 4485 this.set("expression", self._parse_bitwise()) 4486 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4487 args = self._parse_csv(self._parse_bitwise) 4488 this.set("start", seq_get(args, 0)) 4489 this.set("increment", seq_get(args, 1)) 4490 4491 self._match_r_paren() 4492 4493 return this 4494 4495 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4496 self._match_text_seq("LENGTH") 4497 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4498 4499 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4500 if self._match_text_seq("NULL"): 4501 return self.expression(exp.NotNullColumnConstraint) 4502 if self._match_text_seq("CASESPECIFIC"): 4503 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4504 if self._match_text_seq("FOR", "REPLICATION"): 4505 return self.expression(exp.NotForReplicationColumnConstraint) 4506 return None 4507 4508 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4509 if self._match(TokenType.CONSTRAINT): 4510 this = self._parse_id_var() 4511 else: 4512 this = None 4513 4514 if self._match_texts(self.CONSTRAINT_PARSERS): 4515 return self.expression( 4516 exp.ColumnConstraint, 4517 this=this, 4518 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4519 ) 4520 4521 return this 4522 4523 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4524 if not self._match(TokenType.CONSTRAINT): 4525 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4526 4527 return self.expression( 4528 exp.Constraint, 4529 this=self._parse_id_var(), 4530 expressions=self._parse_unnamed_constraints(), 4531 ) 4532 4533 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4534 constraints = [] 4535 while True: 4536 constraint = self._parse_unnamed_constraint() or self._parse_function() 4537 if not constraint: 4538 break 4539 constraints.append(constraint) 4540 4541 return constraints 4542 4543 def _parse_unnamed_constraint( 4544 self, constraints: t.Optional[t.Collection[str]] = None 4545 ) -> t.Optional[exp.Expression]: 4546 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4547 constraints or self.CONSTRAINT_PARSERS 4548 ): 4549 return None 4550 4551 constraint = self._prev.text.upper() 4552 if constraint not in self.CONSTRAINT_PARSERS: 4553 self.raise_error(f"No parser found for schema constraint {constraint}.") 4554 4555 return self.CONSTRAINT_PARSERS[constraint](self) 4556 4557 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4558 self._match_text_seq("KEY") 4559 return self.expression( 4560 exp.UniqueColumnConstraint, 4561 this=self._parse_schema(self._parse_id_var(any_token=False)), 4562 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4563 on_conflict=self._parse_on_conflict(), 4564 ) 4565 4566 def _parse_key_constraint_options(self) -> t.List[str]: 4567 options = [] 4568 while True: 4569 if not self._curr: 4570 break 4571 4572 if self._match(TokenType.ON): 4573 action = None 4574 on = self._advance_any() and self._prev.text 4575 4576 if self._match_text_seq("NO", "ACTION"): 4577 action = "NO ACTION" 4578 elif self._match_text_seq("CASCADE"): 4579 action = "CASCADE" 4580 elif self._match_text_seq("RESTRICT"): 4581 action = "RESTRICT" 4582 elif self._match_pair(TokenType.SET, TokenType.NULL): 4583 action = "SET NULL" 4584 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4585 action = "SET DEFAULT" 4586 else: 4587 self.raise_error("Invalid key constraint") 4588 4589 options.append(f"ON {on} {action}") 4590 elif self._match_text_seq("NOT", "ENFORCED"): 4591 options.append("NOT ENFORCED") 4592 elif self._match_text_seq("DEFERRABLE"): 4593 options.append("DEFERRABLE") 4594 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4595 options.append("INITIALLY DEFERRED") 4596 elif self._match_text_seq("NORELY"): 4597 options.append("NORELY") 4598 elif self._match_text_seq("MATCH", "FULL"): 4599 options.append("MATCH FULL") 4600 else: 4601 break 4602 4603 return options 4604 4605 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4606 if match and not self._match(TokenType.REFERENCES): 4607 return None 4608 4609 expressions = None 4610 this = self._parse_table(schema=True) 4611 options = self._parse_key_constraint_options() 4612 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4613 4614 def _parse_foreign_key(self) -> exp.ForeignKey: 4615 expressions = self._parse_wrapped_id_vars() 4616 reference = self._parse_references() 4617 options = {} 4618 4619 while self._match(TokenType.ON): 4620 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4621 self.raise_error("Expected DELETE or UPDATE") 4622 4623 kind = self._prev.text.lower() 4624 4625 if self._match_text_seq("NO", "ACTION"): 4626 action = "NO ACTION" 4627 elif self._match(TokenType.SET): 4628 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4629 action = "SET " + self._prev.text.upper() 4630 else: 4631 self._advance() 4632 action = self._prev.text.upper() 4633 4634 options[kind] = action 4635 4636 return self.expression( 4637 exp.ForeignKey, 4638 expressions=expressions, 4639 reference=reference, 4640 **options, # type: ignore 4641 ) 4642 4643 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4644 return self._parse_field() 4645 4646 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4647 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4648 self._retreat(self._index - 1) 4649 return None 4650 4651 id_vars = self._parse_wrapped_id_vars() 4652 return self.expression( 4653 exp.PeriodForSystemTimeConstraint, 4654 this=seq_get(id_vars, 0), 4655 expression=seq_get(id_vars, 1), 4656 ) 4657 4658 def _parse_primary_key( 4659 self, wrapped_optional: bool = False, in_props: bool = False 4660 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4661 desc = ( 4662 self._match_set((TokenType.ASC, TokenType.DESC)) 4663 and self._prev.token_type == TokenType.DESC 4664 ) 4665 4666 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4667 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4668 4669 expressions = self._parse_wrapped_csv( 4670 self._parse_primary_key_part, optional=wrapped_optional 4671 ) 4672 options = self._parse_key_constraint_options() 4673 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4674 4675 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4676 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4677 4678 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4679 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4680 return this 4681 4682 bracket_kind = self._prev.token_type 4683 expressions = self._parse_csv( 4684 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4685 ) 4686 4687 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4688 self.raise_error("Expected ]") 4689 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4690 self.raise_error("Expected }") 4691 4692 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4693 if bracket_kind == TokenType.L_BRACE: 4694 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4695 elif not this or this.name.upper() == "ARRAY": 4696 this = self.expression(exp.Array, expressions=expressions) 4697 else: 4698 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4699 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4700 4701 self._add_comments(this) 4702 return self._parse_bracket(this) 4703 4704 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4705 if self._match(TokenType.COLON): 4706 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4707 return this 4708 4709 def _parse_case(self) -> t.Optional[exp.Expression]: 4710 ifs = [] 4711 default = None 4712 4713 comments = self._prev_comments 4714 expression = self._parse_conjunction() 4715 4716 while self._match(TokenType.WHEN): 4717 this = self._parse_conjunction() 4718 self._match(TokenType.THEN) 4719 then = self._parse_conjunction() 4720 ifs.append(self.expression(exp.If, this=this, true=then)) 4721 4722 if self._match(TokenType.ELSE): 4723 default = self._parse_conjunction() 4724 4725 if not self._match(TokenType.END): 4726 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4727 default = exp.column("interval") 4728 else: 4729 self.raise_error("Expected END after CASE", self._prev) 4730 4731 return self._parse_window( 4732 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4733 ) 4734 4735 def _parse_if(self) -> t.Optional[exp.Expression]: 4736 if self._match(TokenType.L_PAREN): 4737 args = self._parse_csv(self._parse_conjunction) 4738 this = self.validate_expression(exp.If.from_arg_list(args), args) 4739 self._match_r_paren() 4740 else: 4741 index = self._index - 1 4742 4743 if self.NO_PAREN_IF_COMMANDS and index == 0: 4744 return self._parse_as_command(self._prev) 4745 4746 condition = self._parse_conjunction() 4747 4748 if not condition: 4749 self._retreat(index) 4750 return None 4751 4752 self._match(TokenType.THEN) 4753 true = self._parse_conjunction() 4754 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4755 self._match(TokenType.END) 4756 this = self.expression(exp.If, this=condition, true=true, false=false) 4757 4758 return self._parse_window(this) 4759 4760 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4761 if not self._match_text_seq("VALUE", "FOR"): 4762 self._retreat(self._index - 1) 4763 return None 4764 4765 return self.expression( 4766 exp.NextValueFor, 4767 this=self._parse_column(), 4768 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4769 ) 4770 4771 def _parse_extract(self) -> exp.Extract: 4772 this = self._parse_function() or self._parse_var() or self._parse_type() 4773 4774 if self._match(TokenType.FROM): 4775 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4776 4777 if not self._match(TokenType.COMMA): 4778 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4779 4780 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4781 4782 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4783 this = self._parse_conjunction() 4784 4785 if not self._match(TokenType.ALIAS): 4786 if self._match(TokenType.COMMA): 4787 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4788 4789 self.raise_error("Expected AS after CAST") 4790 4791 fmt = None 4792 to = self._parse_types() 4793 4794 if self._match(TokenType.FORMAT): 4795 fmt_string = self._parse_string() 4796 fmt = self._parse_at_time_zone(fmt_string) 4797 4798 if not to: 4799 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4800 if to.this in exp.DataType.TEMPORAL_TYPES: 4801 this = self.expression( 4802 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4803 this=this, 4804 format=exp.Literal.string( 4805 format_time( 4806 fmt_string.this if fmt_string else "", 4807 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4808 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4809 ) 4810 ), 4811 ) 4812 4813 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4814 this.set("zone", fmt.args["zone"]) 4815 return this 4816 elif not to: 4817 self.raise_error("Expected TYPE after CAST") 4818 elif isinstance(to, exp.Identifier): 4819 to = exp.DataType.build(to.name, udt=True) 4820 elif to.this == exp.DataType.Type.CHAR: 4821 if self._match(TokenType.CHARACTER_SET): 4822 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4823 4824 return self.expression( 4825 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4826 ) 4827 4828 def _parse_string_agg(self) -> exp.Expression: 4829 if self._match(TokenType.DISTINCT): 4830 args: t.List[t.Optional[exp.Expression]] = [ 4831 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4832 ] 4833 if self._match(TokenType.COMMA): 4834 args.extend(self._parse_csv(self._parse_conjunction)) 4835 else: 4836 args = self._parse_csv(self._parse_conjunction) # type: ignore 4837 4838 index = self._index 4839 if not self._match(TokenType.R_PAREN) and args: 4840 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4841 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4842 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4843 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4844 4845 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4846 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4847 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4848 if not self._match_text_seq("WITHIN", "GROUP"): 4849 self._retreat(index) 4850 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4851 4852 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4853 order = self._parse_order(this=seq_get(args, 0)) 4854 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4855 4856 def _parse_convert( 4857 self, strict: bool, safe: t.Optional[bool] = None 4858 ) -> t.Optional[exp.Expression]: 4859 this = self._parse_bitwise() 4860 4861 if self._match(TokenType.USING): 4862 to: t.Optional[exp.Expression] = self.expression( 4863 exp.CharacterSet, this=self._parse_var() 4864 ) 4865 elif self._match(TokenType.COMMA): 4866 to = self._parse_types() 4867 else: 4868 to = None 4869 4870 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4871 4872 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4873 """ 4874 There are generally two variants of the DECODE function: 4875 4876 - DECODE(bin, charset) 4877 - DECODE(expression, search, result [, search, result] ... [, default]) 4878 4879 The second variant will always be parsed into a CASE expression. Note that NULL 4880 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4881 instead of relying on pattern matching. 4882 """ 4883 args = self._parse_csv(self._parse_conjunction) 4884 4885 if len(args) < 3: 4886 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4887 4888 expression, *expressions = args 4889 if not expression: 4890 return None 4891 4892 ifs = [] 4893 for search, result in zip(expressions[::2], expressions[1::2]): 4894 if not search or not result: 4895 return None 4896 4897 if isinstance(search, exp.Literal): 4898 ifs.append( 4899 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4900 ) 4901 elif isinstance(search, exp.Null): 4902 ifs.append( 4903 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4904 ) 4905 else: 4906 cond = exp.or_( 4907 exp.EQ(this=expression.copy(), expression=search), 4908 exp.and_( 4909 exp.Is(this=expression.copy(), expression=exp.Null()), 4910 exp.Is(this=search.copy(), expression=exp.Null()), 4911 copy=False, 4912 ), 4913 copy=False, 4914 ) 4915 ifs.append(exp.If(this=cond, true=result)) 4916 4917 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4918 4919 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4920 self._match_text_seq("KEY") 4921 key = self._parse_column() 4922 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4923 self._match_text_seq("VALUE") 4924 value = self._parse_bitwise() 4925 4926 if not key and not value: 4927 return None 4928 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4929 4930 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4931 if not this or not self._match_text_seq("FORMAT", "JSON"): 4932 return this 4933 4934 return self.expression(exp.FormatJson, this=this) 4935 4936 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4937 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4938 for value in values: 4939 if self._match_text_seq(value, "ON", on): 4940 return f"{value} ON {on}" 4941 4942 return None 4943 4944 @t.overload 4945 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 4946 4947 @t.overload 4948 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 4949 4950 def _parse_json_object(self, agg=False): 4951 star = self._parse_star() 4952 expressions = ( 4953 [star] 4954 if star 4955 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4956 ) 4957 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4958 4959 unique_keys = None 4960 if self._match_text_seq("WITH", "UNIQUE"): 4961 unique_keys = True 4962 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4963 unique_keys = False 4964 4965 self._match_text_seq("KEYS") 4966 4967 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4968 self._parse_type() 4969 ) 4970 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4971 4972 return self.expression( 4973 exp.JSONObjectAgg if agg else exp.JSONObject, 4974 expressions=expressions, 4975 null_handling=null_handling, 4976 unique_keys=unique_keys, 4977 return_type=return_type, 4978 encoding=encoding, 4979 ) 4980 4981 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4982 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4983 if not self._match_text_seq("NESTED"): 4984 this = self._parse_id_var() 4985 kind = self._parse_types(allow_identifiers=False) 4986 nested = None 4987 else: 4988 this = None 4989 kind = None 4990 nested = True 4991 4992 path = self._match_text_seq("PATH") and self._parse_string() 4993 nested_schema = nested and self._parse_json_schema() 4994 4995 return self.expression( 4996 exp.JSONColumnDef, 4997 this=this, 4998 kind=kind, 4999 path=path, 5000 nested_schema=nested_schema, 5001 ) 5002 5003 def _parse_json_schema(self) -> exp.JSONSchema: 5004 self._match_text_seq("COLUMNS") 5005 return self.expression( 5006 exp.JSONSchema, 5007 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5008 ) 5009 5010 def _parse_json_table(self) -> exp.JSONTable: 5011 this = self._parse_format_json(self._parse_bitwise()) 5012 path = self._match(TokenType.COMMA) and self._parse_string() 5013 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5014 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5015 schema = self._parse_json_schema() 5016 5017 return exp.JSONTable( 5018 this=this, 5019 schema=schema, 5020 path=path, 5021 error_handling=error_handling, 5022 empty_handling=empty_handling, 5023 ) 5024 5025 def _parse_match_against(self) -> exp.MatchAgainst: 5026 expressions = self._parse_csv(self._parse_column) 5027 5028 self._match_text_seq(")", "AGAINST", "(") 5029 5030 this = self._parse_string() 5031 5032 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5033 modifier = "IN NATURAL LANGUAGE MODE" 5034 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5035 modifier = f"{modifier} WITH QUERY EXPANSION" 5036 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5037 modifier = "IN BOOLEAN MODE" 5038 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5039 modifier = "WITH QUERY EXPANSION" 5040 else: 5041 modifier = None 5042 5043 return self.expression( 5044 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5045 ) 5046 5047 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5048 def _parse_open_json(self) -> exp.OpenJSON: 5049 this = self._parse_bitwise() 5050 path = self._match(TokenType.COMMA) and self._parse_string() 5051 5052 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5053 this = self._parse_field(any_token=True) 5054 kind = self._parse_types() 5055 path = self._parse_string() 5056 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5057 5058 return self.expression( 5059 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5060 ) 5061 5062 expressions = None 5063 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5064 self._match_l_paren() 5065 expressions = self._parse_csv(_parse_open_json_column_def) 5066 5067 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5068 5069 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5070 args = self._parse_csv(self._parse_bitwise) 5071 5072 if self._match(TokenType.IN): 5073 return self.expression( 5074 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5075 ) 5076 5077 if haystack_first: 5078 haystack = seq_get(args, 0) 5079 needle = seq_get(args, 1) 5080 else: 5081 needle = seq_get(args, 0) 5082 haystack = seq_get(args, 1) 5083 5084 return self.expression( 5085 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5086 ) 5087 5088 def _parse_predict(self) -> exp.Predict: 5089 self._match_text_seq("MODEL") 5090 this = self._parse_table() 5091 5092 self._match(TokenType.COMMA) 5093 self._match_text_seq("TABLE") 5094 5095 return self.expression( 5096 exp.Predict, 5097 this=this, 5098 expression=self._parse_table(), 5099 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5100 ) 5101 5102 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5103 args = self._parse_csv(self._parse_table) 5104 return exp.JoinHint(this=func_name.upper(), expressions=args) 5105 5106 def _parse_substring(self) -> exp.Substring: 5107 # Postgres supports the form: substring(string [from int] [for int]) 5108 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5109 5110 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5111 5112 if self._match(TokenType.FROM): 5113 args.append(self._parse_bitwise()) 5114 if self._match(TokenType.FOR): 5115 args.append(self._parse_bitwise()) 5116 5117 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5118 5119 def _parse_trim(self) -> exp.Trim: 5120 # https://www.w3resource.com/sql/character-functions/trim.php 5121 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5122 5123 position = None 5124 collation = None 5125 expression = None 5126 5127 if self._match_texts(self.TRIM_TYPES): 5128 position = self._prev.text.upper() 5129 5130 this = self._parse_bitwise() 5131 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5132 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5133 expression = self._parse_bitwise() 5134 5135 if invert_order: 5136 this, expression = expression, this 5137 5138 if self._match(TokenType.COLLATE): 5139 collation = self._parse_bitwise() 5140 5141 return self.expression( 5142 exp.Trim, this=this, position=position, expression=expression, collation=collation 5143 ) 5144 5145 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5146 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5147 5148 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5149 return self._parse_window(self._parse_id_var(), alias=True) 5150 5151 def _parse_respect_or_ignore_nulls( 5152 self, this: t.Optional[exp.Expression] 5153 ) -> t.Optional[exp.Expression]: 5154 if self._match_text_seq("IGNORE", "NULLS"): 5155 return self.expression(exp.IgnoreNulls, this=this) 5156 if self._match_text_seq("RESPECT", "NULLS"): 5157 return self.expression(exp.RespectNulls, this=this) 5158 return this 5159 5160 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5161 if self._match(TokenType.HAVING): 5162 self._match_texts(("MAX", "MIN")) 5163 max = self._prev.text.upper() != "MIN" 5164 return self.expression( 5165 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5166 ) 5167 5168 return this 5169 5170 def _parse_window( 5171 self, this: t.Optional[exp.Expression], alias: bool = False 5172 ) -> t.Optional[exp.Expression]: 5173 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5174 self._match(TokenType.WHERE) 5175 this = self.expression( 5176 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5177 ) 5178 self._match_r_paren() 5179 5180 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5181 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5182 if self._match_text_seq("WITHIN", "GROUP"): 5183 order = self._parse_wrapped(self._parse_order) 5184 this = self.expression(exp.WithinGroup, this=this, expression=order) 5185 5186 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5187 # Some dialects choose to implement and some do not. 5188 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5189 5190 # There is some code above in _parse_lambda that handles 5191 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5192 5193 # The below changes handle 5194 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5195 5196 # Oracle allows both formats 5197 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5198 # and Snowflake chose to do the same for familiarity 5199 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5200 if isinstance(this, exp.AggFunc): 5201 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5202 5203 if ignore_respect and ignore_respect is not this: 5204 ignore_respect.replace(ignore_respect.this) 5205 this = self.expression(ignore_respect.__class__, this=this) 5206 5207 this = self._parse_respect_or_ignore_nulls(this) 5208 5209 # bigquery select from window x AS (partition by ...) 5210 if alias: 5211 over = None 5212 self._match(TokenType.ALIAS) 5213 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5214 return this 5215 else: 5216 over = self._prev.text.upper() 5217 5218 if not self._match(TokenType.L_PAREN): 5219 return self.expression( 5220 exp.Window, this=this, alias=self._parse_id_var(False), over=over 5221 ) 5222 5223 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5224 5225 first = self._match(TokenType.FIRST) 5226 if self._match_text_seq("LAST"): 5227 first = False 5228 5229 partition, order = self._parse_partition_and_order() 5230 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5231 5232 if kind: 5233 self._match(TokenType.BETWEEN) 5234 start = self._parse_window_spec() 5235 self._match(TokenType.AND) 5236 end = self._parse_window_spec() 5237 5238 spec = self.expression( 5239 exp.WindowSpec, 5240 kind=kind, 5241 start=start["value"], 5242 start_side=start["side"], 5243 end=end["value"], 5244 end_side=end["side"], 5245 ) 5246 else: 5247 spec = None 5248 5249 self._match_r_paren() 5250 5251 window = self.expression( 5252 exp.Window, 5253 this=this, 5254 partition_by=partition, 5255 order=order, 5256 spec=spec, 5257 alias=window_alias, 5258 over=over, 5259 first=first, 5260 ) 5261 5262 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5263 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5264 return self._parse_window(window, alias=alias) 5265 5266 return window 5267 5268 def _parse_partition_and_order( 5269 self, 5270 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5271 return self._parse_partition_by(), self._parse_order() 5272 5273 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5274 self._match(TokenType.BETWEEN) 5275 5276 return { 5277 "value": ( 5278 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5279 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5280 or self._parse_bitwise() 5281 ), 5282 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5283 } 5284 5285 def _parse_alias( 5286 self, this: t.Optional[exp.Expression], explicit: bool = False 5287 ) -> t.Optional[exp.Expression]: 5288 any_token = self._match(TokenType.ALIAS) 5289 comments = self._prev_comments 5290 5291 if explicit and not any_token: 5292 return this 5293 5294 if self._match(TokenType.L_PAREN): 5295 aliases = self.expression( 5296 exp.Aliases, 5297 comments=comments, 5298 this=this, 5299 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5300 ) 5301 self._match_r_paren(aliases) 5302 return aliases 5303 5304 alias = self._parse_id_var(any_token) or ( 5305 self.STRING_ALIASES and self._parse_string_as_identifier() 5306 ) 5307 5308 if alias: 5309 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5310 column = this.this 5311 5312 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5313 if not this.comments and column and column.comments: 5314 this.comments = column.comments 5315 column.comments = None 5316 5317 return this 5318 5319 def _parse_id_var( 5320 self, 5321 any_token: bool = True, 5322 tokens: t.Optional[t.Collection[TokenType]] = None, 5323 ) -> t.Optional[exp.Expression]: 5324 identifier = self._parse_identifier() 5325 5326 if identifier: 5327 return identifier 5328 5329 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5330 quoted = self._prev.token_type == TokenType.STRING 5331 return exp.Identifier(this=self._prev.text, quoted=quoted) 5332 5333 return None 5334 5335 def _parse_string(self) -> t.Optional[exp.Expression]: 5336 if self._match_set(self.STRING_PARSERS): 5337 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5338 return self._parse_placeholder() 5339 5340 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5341 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5342 5343 def _parse_number(self) -> t.Optional[exp.Expression]: 5344 if self._match_set(self.NUMERIC_PARSERS): 5345 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5346 return self._parse_placeholder() 5347 5348 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5349 if self._match(TokenType.IDENTIFIER): 5350 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5351 return self._parse_placeholder() 5352 5353 def _parse_var( 5354 self, 5355 any_token: bool = False, 5356 tokens: t.Optional[t.Collection[TokenType]] = None, 5357 upper: bool = False, 5358 ) -> t.Optional[exp.Expression]: 5359 if ( 5360 (any_token and self._advance_any()) 5361 or self._match(TokenType.VAR) 5362 or (self._match_set(tokens) if tokens else False) 5363 ): 5364 return self.expression( 5365 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5366 ) 5367 return self._parse_placeholder() 5368 5369 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5370 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5371 self._advance() 5372 return self._prev 5373 return None 5374 5375 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5376 return self._parse_var() or self._parse_string() 5377 5378 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5379 return self._parse_primary() or self._parse_var(any_token=True) 5380 5381 def _parse_null(self) -> t.Optional[exp.Expression]: 5382 if self._match_set(self.NULL_TOKENS): 5383 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5384 return self._parse_placeholder() 5385 5386 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5387 if self._match(TokenType.TRUE): 5388 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5389 if self._match(TokenType.FALSE): 5390 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5391 return self._parse_placeholder() 5392 5393 def _parse_star(self) -> t.Optional[exp.Expression]: 5394 if self._match(TokenType.STAR): 5395 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5396 return self._parse_placeholder() 5397 5398 def _parse_parameter(self) -> exp.Parameter: 5399 self._match(TokenType.L_BRACE) 5400 this = self._parse_identifier() or self._parse_primary_or_var() 5401 expression = self._match(TokenType.COLON) and ( 5402 self._parse_identifier() or self._parse_primary_or_var() 5403 ) 5404 self._match(TokenType.R_BRACE) 5405 return self.expression(exp.Parameter, this=this, expression=expression) 5406 5407 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5408 if self._match_set(self.PLACEHOLDER_PARSERS): 5409 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5410 if placeholder: 5411 return placeholder 5412 self._advance(-1) 5413 return None 5414 5415 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5416 if not self._match(TokenType.EXCEPT): 5417 return None 5418 if self._match(TokenType.L_PAREN, advance=False): 5419 return self._parse_wrapped_csv(self._parse_column) 5420 5421 except_column = self._parse_column() 5422 return [except_column] if except_column else None 5423 5424 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5425 if not self._match(TokenType.REPLACE): 5426 return None 5427 if self._match(TokenType.L_PAREN, advance=False): 5428 return self._parse_wrapped_csv(self._parse_expression) 5429 5430 replace_expression = self._parse_expression() 5431 return [replace_expression] if replace_expression else None 5432 5433 def _parse_csv( 5434 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5435 ) -> t.List[exp.Expression]: 5436 parse_result = parse_method() 5437 items = [parse_result] if parse_result is not None else [] 5438 5439 while self._match(sep): 5440 self._add_comments(parse_result) 5441 parse_result = parse_method() 5442 if parse_result is not None: 5443 items.append(parse_result) 5444 5445 return items 5446 5447 def _parse_tokens( 5448 self, parse_method: t.Callable, expressions: t.Dict 5449 ) -> t.Optional[exp.Expression]: 5450 this = parse_method() 5451 5452 while self._match_set(expressions): 5453 this = self.expression( 5454 expressions[self._prev.token_type], 5455 this=this, 5456 comments=self._prev_comments, 5457 expression=parse_method(), 5458 ) 5459 5460 return this 5461 5462 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5463 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5464 5465 def _parse_wrapped_csv( 5466 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5467 ) -> t.List[exp.Expression]: 5468 return self._parse_wrapped( 5469 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5470 ) 5471 5472 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5473 wrapped = self._match(TokenType.L_PAREN) 5474 if not wrapped and not optional: 5475 self.raise_error("Expecting (") 5476 parse_result = parse_method() 5477 if wrapped: 5478 self._match_r_paren() 5479 return parse_result 5480 5481 def _parse_expressions(self) -> t.List[exp.Expression]: 5482 return self._parse_csv(self._parse_expression) 5483 5484 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5485 return self._parse_select() or self._parse_set_operations( 5486 self._parse_expression() if alias else self._parse_conjunction() 5487 ) 5488 5489 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5490 return self._parse_query_modifiers( 5491 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5492 ) 5493 5494 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5495 this = None 5496 if self._match_texts(self.TRANSACTION_KIND): 5497 this = self._prev.text 5498 5499 self._match_texts(("TRANSACTION", "WORK")) 5500 5501 modes = [] 5502 while True: 5503 mode = [] 5504 while self._match(TokenType.VAR): 5505 mode.append(self._prev.text) 5506 5507 if mode: 5508 modes.append(" ".join(mode)) 5509 if not self._match(TokenType.COMMA): 5510 break 5511 5512 return self.expression(exp.Transaction, this=this, modes=modes) 5513 5514 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5515 chain = None 5516 savepoint = None 5517 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5518 5519 self._match_texts(("TRANSACTION", "WORK")) 5520 5521 if self._match_text_seq("TO"): 5522 self._match_text_seq("SAVEPOINT") 5523 savepoint = self._parse_id_var() 5524 5525 if self._match(TokenType.AND): 5526 chain = not self._match_text_seq("NO") 5527 self._match_text_seq("CHAIN") 5528 5529 if is_rollback: 5530 return self.expression(exp.Rollback, savepoint=savepoint) 5531 5532 return self.expression(exp.Commit, chain=chain) 5533 5534 def _parse_refresh(self) -> exp.Refresh: 5535 self._match(TokenType.TABLE) 5536 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5537 5538 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5539 if not self._match_text_seq("ADD"): 5540 return None 5541 5542 self._match(TokenType.COLUMN) 5543 exists_column = self._parse_exists(not_=True) 5544 expression = self._parse_field_def() 5545 5546 if expression: 5547 expression.set("exists", exists_column) 5548 5549 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5550 if self._match_texts(("FIRST", "AFTER")): 5551 position = self._prev.text 5552 column_position = self.expression( 5553 exp.ColumnPosition, this=self._parse_column(), position=position 5554 ) 5555 expression.set("position", column_position) 5556 5557 return expression 5558 5559 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5560 drop = self._match(TokenType.DROP) and self._parse_drop() 5561 if drop and not isinstance(drop, exp.Command): 5562 drop.set("kind", drop.args.get("kind", "COLUMN")) 5563 return drop 5564 5565 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5566 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5567 return self.expression( 5568 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5569 ) 5570 5571 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5572 index = self._index - 1 5573 5574 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5575 return self._parse_csv( 5576 lambda: self.expression( 5577 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5578 ) 5579 ) 5580 5581 self._retreat(index) 5582 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5583 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5584 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5585 5586 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5587 self._match(TokenType.COLUMN) 5588 column = self._parse_field(any_token=True) 5589 5590 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5591 return self.expression(exp.AlterColumn, this=column, drop=True) 5592 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5593 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5594 if self._match(TokenType.COMMENT): 5595 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5596 5597 self._match_text_seq("SET", "DATA") 5598 return self.expression( 5599 exp.AlterColumn, 5600 this=column, 5601 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5602 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5603 using=self._match(TokenType.USING) and self._parse_conjunction(), 5604 ) 5605 5606 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5607 index = self._index - 1 5608 5609 partition_exists = self._parse_exists() 5610 if self._match(TokenType.PARTITION, advance=False): 5611 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5612 5613 self._retreat(index) 5614 return self._parse_csv(self._parse_drop_column) 5615 5616 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5617 if self._match(TokenType.COLUMN): 5618 exists = self._parse_exists() 5619 old_column = self._parse_column() 5620 to = self._match_text_seq("TO") 5621 new_column = self._parse_column() 5622 5623 if old_column is None or to is None or new_column is None: 5624 return None 5625 5626 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5627 5628 self._match_text_seq("TO") 5629 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5630 5631 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5632 start = self._prev 5633 5634 if not self._match(TokenType.TABLE): 5635 return self._parse_as_command(start) 5636 5637 exists = self._parse_exists() 5638 only = self._match_text_seq("ONLY") 5639 this = self._parse_table(schema=True) 5640 5641 if self._next: 5642 self._advance() 5643 5644 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5645 if parser: 5646 actions = ensure_list(parser(self)) 5647 options = self._parse_csv(self._parse_property) 5648 5649 if not self._curr and actions: 5650 return self.expression( 5651 exp.AlterTable, 5652 this=this, 5653 exists=exists, 5654 actions=actions, 5655 only=only, 5656 options=options, 5657 ) 5658 5659 return self._parse_as_command(start) 5660 5661 def _parse_merge(self) -> exp.Merge: 5662 self._match(TokenType.INTO) 5663 target = self._parse_table() 5664 5665 if target and self._match(TokenType.ALIAS, advance=False): 5666 target.set("alias", self._parse_table_alias()) 5667 5668 self._match(TokenType.USING) 5669 using = self._parse_table() 5670 5671 self._match(TokenType.ON) 5672 on = self._parse_conjunction() 5673 5674 return self.expression( 5675 exp.Merge, 5676 this=target, 5677 using=using, 5678 on=on, 5679 expressions=self._parse_when_matched(), 5680 ) 5681 5682 def _parse_when_matched(self) -> t.List[exp.When]: 5683 whens = [] 5684 5685 while self._match(TokenType.WHEN): 5686 matched = not self._match(TokenType.NOT) 5687 self._match_text_seq("MATCHED") 5688 source = ( 5689 False 5690 if self._match_text_seq("BY", "TARGET") 5691 else self._match_text_seq("BY", "SOURCE") 5692 ) 5693 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5694 5695 self._match(TokenType.THEN) 5696 5697 if self._match(TokenType.INSERT): 5698 _this = self._parse_star() 5699 if _this: 5700 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5701 else: 5702 then = self.expression( 5703 exp.Insert, 5704 this=self._parse_value(), 5705 expression=self._match_text_seq("VALUES") and self._parse_value(), 5706 ) 5707 elif self._match(TokenType.UPDATE): 5708 expressions = self._parse_star() 5709 if expressions: 5710 then = self.expression(exp.Update, expressions=expressions) 5711 else: 5712 then = self.expression( 5713 exp.Update, 5714 expressions=self._match(TokenType.SET) 5715 and self._parse_csv(self._parse_equality), 5716 ) 5717 elif self._match(TokenType.DELETE): 5718 then = self.expression(exp.Var, this=self._prev.text) 5719 else: 5720 then = None 5721 5722 whens.append( 5723 self.expression( 5724 exp.When, 5725 matched=matched, 5726 source=source, 5727 condition=condition, 5728 then=then, 5729 ) 5730 ) 5731 return whens 5732 5733 def _parse_show(self) -> t.Optional[exp.Expression]: 5734 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5735 if parser: 5736 return parser(self) 5737 return self._parse_as_command(self._prev) 5738 5739 def _parse_set_item_assignment( 5740 self, kind: t.Optional[str] = None 5741 ) -> t.Optional[exp.Expression]: 5742 index = self._index 5743 5744 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5745 return self._parse_set_transaction(global_=kind == "GLOBAL") 5746 5747 left = self._parse_primary() or self._parse_id_var() 5748 assignment_delimiter = self._match_texts(("=", "TO")) 5749 5750 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5751 self._retreat(index) 5752 return None 5753 5754 right = self._parse_statement() or self._parse_id_var() 5755 this = self.expression(exp.EQ, this=left, expression=right) 5756 5757 return self.expression(exp.SetItem, this=this, kind=kind) 5758 5759 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5760 self._match_text_seq("TRANSACTION") 5761 characteristics = self._parse_csv( 5762 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5763 ) 5764 return self.expression( 5765 exp.SetItem, 5766 expressions=characteristics, 5767 kind="TRANSACTION", 5768 **{"global": global_}, # type: ignore 5769 ) 5770 5771 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5772 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5773 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5774 5775 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5776 index = self._index 5777 set_ = self.expression( 5778 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5779 ) 5780 5781 if self._curr: 5782 self._retreat(index) 5783 return self._parse_as_command(self._prev) 5784 5785 return set_ 5786 5787 def _parse_var_from_options( 5788 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5789 ) -> t.Optional[exp.Var]: 5790 start = self._curr 5791 if not start: 5792 return None 5793 5794 option = start.text.upper() 5795 continuations = options.get(option) 5796 5797 index = self._index 5798 self._advance() 5799 for keywords in continuations or []: 5800 if isinstance(keywords, str): 5801 keywords = (keywords,) 5802 5803 if self._match_text_seq(*keywords): 5804 option = f"{option} {' '.join(keywords)}" 5805 break 5806 else: 5807 if continuations or continuations is None: 5808 if raise_unmatched: 5809 self.raise_error(f"Unknown option {option}") 5810 5811 self._retreat(index) 5812 return None 5813 5814 return exp.var(option) 5815 5816 def _parse_as_command(self, start: Token) -> exp.Command: 5817 while self._curr: 5818 self._advance() 5819 text = self._find_sql(start, self._prev) 5820 size = len(start.text) 5821 self._warn_unsupported() 5822 return exp.Command(this=text[:size], expression=text[size:]) 5823 5824 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5825 settings = [] 5826 5827 self._match_l_paren() 5828 kind = self._parse_id_var() 5829 5830 if self._match(TokenType.L_PAREN): 5831 while True: 5832 key = self._parse_id_var() 5833 value = self._parse_primary() 5834 5835 if not key and value is None: 5836 break 5837 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5838 self._match(TokenType.R_PAREN) 5839 5840 self._match_r_paren() 5841 5842 return self.expression( 5843 exp.DictProperty, 5844 this=this, 5845 kind=kind.this if kind else None, 5846 settings=settings, 5847 ) 5848 5849 def _parse_dict_range(self, this: str) -> exp.DictRange: 5850 self._match_l_paren() 5851 has_min = self._match_text_seq("MIN") 5852 if has_min: 5853 min = self._parse_var() or self._parse_primary() 5854 self._match_text_seq("MAX") 5855 max = self._parse_var() or self._parse_primary() 5856 else: 5857 max = self._parse_var() or self._parse_primary() 5858 min = exp.Literal.number(0) 5859 self._match_r_paren() 5860 return self.expression(exp.DictRange, this=this, min=min, max=max) 5861 5862 def _parse_comprehension( 5863 self, this: t.Optional[exp.Expression] 5864 ) -> t.Optional[exp.Comprehension]: 5865 index = self._index 5866 expression = self._parse_column() 5867 if not self._match(TokenType.IN): 5868 self._retreat(index - 1) 5869 return None 5870 iterator = self._parse_column() 5871 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5872 return self.expression( 5873 exp.Comprehension, 5874 this=this, 5875 expression=expression, 5876 iterator=iterator, 5877 condition=condition, 5878 ) 5879 5880 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5881 if self._match(TokenType.HEREDOC_STRING): 5882 return self.expression(exp.Heredoc, this=self._prev.text) 5883 5884 if not self._match_text_seq("$"): 5885 return None 5886 5887 tags = ["$"] 5888 tag_text = None 5889 5890 if self._is_connected(): 5891 self._advance() 5892 tags.append(self._prev.text.upper()) 5893 else: 5894 self.raise_error("No closing $ found") 5895 5896 if tags[-1] != "$": 5897 if self._is_connected() and self._match_text_seq("$"): 5898 tag_text = tags[-1] 5899 tags.append("$") 5900 else: 5901 self.raise_error("No closing $ found") 5902 5903 heredoc_start = self._curr 5904 5905 while self._curr: 5906 if self._match_text_seq(*tags, advance=False): 5907 this = self._find_sql(heredoc_start, self._prev) 5908 self._advance(len(tags)) 5909 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5910 5911 self._advance() 5912 5913 self.raise_error(f"No closing {''.join(tags)} found") 5914 return None 5915 5916 def _find_parser( 5917 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5918 ) -> t.Optional[t.Callable]: 5919 if not self._curr: 5920 return None 5921 5922 index = self._index 5923 this = [] 5924 while True: 5925 # The current token might be multiple words 5926 curr = self._curr.text.upper() 5927 key = curr.split(" ") 5928 this.append(curr) 5929 5930 self._advance() 5931 result, trie = in_trie(trie, key) 5932 if result == TrieResult.FAILED: 5933 break 5934 5935 if result == TrieResult.EXISTS: 5936 subparser = parsers[" ".join(this)] 5937 return subparser 5938 5939 self._retreat(index) 5940 return None 5941 5942 def _match(self, token_type, advance=True, expression=None): 5943 if not self._curr: 5944 return None 5945 5946 if self._curr.token_type == token_type: 5947 if advance: 5948 self._advance() 5949 self._add_comments(expression) 5950 return True 5951 5952 return None 5953 5954 def _match_set(self, types, advance=True): 5955 if not self._curr: 5956 return None 5957 5958 if self._curr.token_type in types: 5959 if advance: 5960 self._advance() 5961 return True 5962 5963 return None 5964 5965 def _match_pair(self, token_type_a, token_type_b, advance=True): 5966 if not self._curr or not self._next: 5967 return None 5968 5969 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5970 if advance: 5971 self._advance(2) 5972 return True 5973 5974 return None 5975 5976 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5977 if not self._match(TokenType.L_PAREN, expression=expression): 5978 self.raise_error("Expecting (") 5979 5980 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5981 if not self._match(TokenType.R_PAREN, expression=expression): 5982 self.raise_error("Expecting )") 5983 5984 def _match_texts(self, texts, advance=True): 5985 if self._curr and self._curr.text.upper() in texts: 5986 if advance: 5987 self._advance() 5988 return True 5989 return None 5990 5991 def _match_text_seq(self, *texts, advance=True): 5992 index = self._index 5993 for text in texts: 5994 if self._curr and self._curr.text.upper() == text: 5995 self._advance() 5996 else: 5997 self._retreat(index) 5998 return None 5999 6000 if not advance: 6001 self._retreat(index) 6002 6003 return True 6004 6005 @t.overload 6006 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: ... 6007 6008 @t.overload 6009 def _replace_columns_with_dots( 6010 self, this: t.Optional[exp.Expression] 6011 ) -> t.Optional[exp.Expression]: ... 6012 6013 def _replace_columns_with_dots(self, this): 6014 if isinstance(this, exp.Dot): 6015 exp.replace_children(this, self._replace_columns_with_dots) 6016 elif isinstance(this, exp.Column): 6017 exp.replace_children(this, self._replace_columns_with_dots) 6018 table = this.args.get("table") 6019 this = ( 6020 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 6021 ) 6022 6023 return this 6024 6025 def _replace_lambda( 6026 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6027 ) -> t.Optional[exp.Expression]: 6028 if not node: 6029 return node 6030 6031 for column in node.find_all(exp.Column): 6032 if column.parts[0].name in lambda_variables: 6033 dot_or_id = column.to_dot() if column.table else column.this 6034 parent = column.parent 6035 6036 while isinstance(parent, exp.Dot): 6037 if not isinstance(parent.parent, exp.Dot): 6038 parent.replace(dot_or_id) 6039 break 6040 parent = parent.parent 6041 else: 6042 if column is node: 6043 node = dot_or_id 6044 else: 6045 column.replace(dot_or_id) 6046 return node 6047 6048 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6049 start = self._prev 6050 6051 # Not to be confused with TRUNCATE(number, decimals) function call 6052 if self._match(TokenType.L_PAREN): 6053 self._retreat(self._index - 2) 6054 return self._parse_function() 6055 6056 # Clickhouse supports TRUNCATE DATABASE as well 6057 is_database = self._match(TokenType.DATABASE) 6058 6059 self._match(TokenType.TABLE) 6060 6061 exists = self._parse_exists(not_=False) 6062 6063 expressions = self._parse_csv( 6064 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6065 ) 6066 6067 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6068 6069 if self._match_text_seq("RESTART", "IDENTITY"): 6070 identity = "RESTART" 6071 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6072 identity = "CONTINUE" 6073 else: 6074 identity = None 6075 6076 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6077 option = self._prev.text 6078 else: 6079 option = None 6080 6081 partition = self._parse_partition() 6082 6083 # Fallback case 6084 if self._curr: 6085 return self._parse_as_command(start) 6086 6087 return self.expression( 6088 exp.TruncateTable, 6089 expressions=expressions, 6090 is_database=is_database, 6091 exists=exists, 6092 cluster=cluster, 6093 identity=identity, 6094 option=option, 6095 partition=partition, 6096 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1090 def __init__( 1091 self, 1092 error_level: t.Optional[ErrorLevel] = None, 1093 error_message_context: int = 100, 1094 max_errors: int = 3, 1095 dialect: DialectType = None, 1096 ): 1097 from sqlglot.dialects import Dialect 1098 1099 self.error_level = error_level or ErrorLevel.IMMEDIATE 1100 self.error_message_context = error_message_context 1101 self.max_errors = max_errors 1102 self.dialect = Dialect.get_or_raise(dialect) 1103 self.reset()
1115 def parse( 1116 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1117 ) -> t.List[t.Optional[exp.Expression]]: 1118 """ 1119 Parses a list of tokens and returns a list of syntax trees, one tree 1120 per parsed SQL statement. 1121 1122 Args: 1123 raw_tokens: The list of tokens. 1124 sql: The original SQL string, used to produce helpful debug messages. 1125 1126 Returns: 1127 The list of the produced syntax trees. 1128 """ 1129 return self._parse( 1130 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1131 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1133 def parse_into( 1134 self, 1135 expression_types: exp.IntoType, 1136 raw_tokens: t.List[Token], 1137 sql: t.Optional[str] = None, 1138 ) -> t.List[t.Optional[exp.Expression]]: 1139 """ 1140 Parses a list of tokens into a given Expression type. If a collection of Expression 1141 types is given instead, this method will try to parse the token list into each one 1142 of them, stopping at the first for which the parsing succeeds. 1143 1144 Args: 1145 expression_types: The expression type(s) to try and parse the token list into. 1146 raw_tokens: The list of tokens. 1147 sql: The original SQL string, used to produce helpful debug messages. 1148 1149 Returns: 1150 The target Expression. 1151 """ 1152 errors = [] 1153 for expression_type in ensure_list(expression_types): 1154 parser = self.EXPRESSION_PARSERS.get(expression_type) 1155 if not parser: 1156 raise TypeError(f"No parser registered for {expression_type}") 1157 1158 try: 1159 return self._parse(parser, raw_tokens, sql) 1160 except ParseError as e: 1161 e.errors[0]["into_expression"] = expression_type 1162 errors.append(e) 1163 1164 raise ParseError( 1165 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1166 errors=merge_errors(errors), 1167 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1204 def check_errors(self) -> None: 1205 """Logs or raises any found errors, depending on the chosen error level setting.""" 1206 if self.error_level == ErrorLevel.WARN: 1207 for error in self.errors: 1208 logger.error(str(error)) 1209 elif self.error_level == ErrorLevel.RAISE and self.errors: 1210 raise ParseError( 1211 concat_messages(self.errors, self.max_errors), 1212 errors=merge_errors(self.errors), 1213 )
Logs or raises any found errors, depending on the chosen error level setting.
1215 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1216 """ 1217 Appends an error in the list of recorded errors or raises it, depending on the chosen 1218 error level setting. 1219 """ 1220 token = token or self._curr or self._prev or Token.string("") 1221 start = token.start 1222 end = token.end + 1 1223 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1224 highlight = self.sql[start:end] 1225 end_context = self.sql[end : end + self.error_message_context] 1226 1227 error = ParseError.new( 1228 f"{message}. Line {token.line}, Col: {token.col}.\n" 1229 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1230 description=message, 1231 line=token.line, 1232 col=token.col, 1233 start_context=start_context, 1234 highlight=highlight, 1235 end_context=end_context, 1236 ) 1237 1238 if self.error_level == ErrorLevel.IMMEDIATE: 1239 raise error 1240 1241 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1243 def expression( 1244 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1245 ) -> E: 1246 """ 1247 Creates a new, validated Expression. 1248 1249 Args: 1250 exp_class: The expression class to instantiate. 1251 comments: An optional list of comments to attach to the expression. 1252 kwargs: The arguments to set for the expression along with their respective values. 1253 1254 Returns: 1255 The target expression. 1256 """ 1257 instance = exp_class(**kwargs) 1258 instance.add_comments(comments) if comments else self._add_comments(instance) 1259 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1266 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1267 """ 1268 Validates an Expression, making sure that all its mandatory arguments are set. 1269 1270 Args: 1271 expression: The expression to validate. 1272 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1273 1274 Returns: 1275 The validated expression. 1276 """ 1277 if self.error_level != ErrorLevel.IGNORE: 1278 for error_message in expression.error_messages(args): 1279 self.raise_error(error_message) 1280 1281 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.