sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from typing_extensions import Literal 16 17 from sqlglot._typing import E 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20logger = logging.getLogger("sqlglot") 21 22 23def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 24 if len(args) == 1 and args[0].is_star: 25 return exp.StarMap(this=args[0]) 26 27 keys = [] 28 values = [] 29 for i in range(0, len(args), 2): 30 keys.append(args[i]) 31 values.append(args[i + 1]) 32 33 return exp.VarMap( 34 keys=exp.Array(expressions=keys), 35 values=exp.Array(expressions=values), 36 ) 37 38 39def parse_like(args: t.List) -> exp.Escape | exp.Like: 40 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 41 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 42 43 44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 return lambda self, this: self._parse_escape( 48 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 49 ) 50 51 52def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 53 # Default argument order is base, expression 54 this = seq_get(args, 0) 55 expression = seq_get(args, 1) 56 57 if expression: 58 if not dialect.LOG_BASE_FIRST: 59 this, expression = expression, this 60 return exp.Log(this=this, expression=expression) 61 62 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 63 64 65class _Parser(type): 66 def __new__(cls, clsname, bases, attrs): 67 klass = super().__new__(cls, clsname, bases, attrs) 68 69 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 70 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 71 72 return klass 73 74 75class Parser(metaclass=_Parser): 76 """ 77 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 78 79 Args: 80 error_level: The desired error level. 81 Default: ErrorLevel.IMMEDIATE 82 error_message_context: Determines the amount of context to capture from a 83 query string when displaying the error message (in number of characters). 84 Default: 100 85 max_errors: Maximum number of error messages to include in a raised ParseError. 86 This is only relevant if error_level is ErrorLevel.RAISE. 87 Default: 3 88 """ 89 90 FUNCTIONS: t.Dict[str, t.Callable] = { 91 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 92 "CONCAT": lambda args, dialect: exp.Concat( 93 expressions=args, 94 safe=not dialect.STRICT_STRING_CONCAT, 95 coalesce=dialect.CONCAT_COALESCE, 96 ), 97 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 98 expressions=args, 99 safe=not dialect.STRICT_STRING_CONCAT, 100 coalesce=dialect.CONCAT_COALESCE, 101 ), 102 "DATE_TO_DATE_STR": lambda args: exp.Cast( 103 this=seq_get(args, 0), 104 to=exp.DataType(this=exp.DataType.Type.TEXT), 105 ), 106 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 107 "LIKE": parse_like, 108 "LOG": parse_logarithm, 109 "TIME_TO_TIME_STR": lambda args: exp.Cast( 110 this=seq_get(args, 0), 111 to=exp.DataType(this=exp.DataType.Type.TEXT), 112 ), 113 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 114 this=exp.Cast( 115 this=seq_get(args, 0), 116 to=exp.DataType(this=exp.DataType.Type.TEXT), 117 ), 118 start=exp.Literal.number(1), 119 length=exp.Literal.number(10), 120 ), 121 "VAR_MAP": parse_var_map, 122 } 123 124 NO_PAREN_FUNCTIONS = { 125 TokenType.CURRENT_DATE: exp.CurrentDate, 126 TokenType.CURRENT_DATETIME: exp.CurrentDate, 127 TokenType.CURRENT_TIME: exp.CurrentTime, 128 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 129 TokenType.CURRENT_USER: exp.CurrentUser, 130 } 131 132 STRUCT_TYPE_TOKENS = { 133 TokenType.NESTED, 134 TokenType.STRUCT, 135 } 136 137 NESTED_TYPE_TOKENS = { 138 TokenType.ARRAY, 139 TokenType.LOWCARDINALITY, 140 TokenType.MAP, 141 TokenType.NULLABLE, 142 *STRUCT_TYPE_TOKENS, 143 } 144 145 ENUM_TYPE_TOKENS = { 146 TokenType.ENUM, 147 TokenType.ENUM8, 148 TokenType.ENUM16, 149 } 150 151 TYPE_TOKENS = { 152 TokenType.BIT, 153 TokenType.BOOLEAN, 154 TokenType.TINYINT, 155 TokenType.UTINYINT, 156 TokenType.SMALLINT, 157 TokenType.USMALLINT, 158 TokenType.INT, 159 TokenType.UINT, 160 TokenType.BIGINT, 161 TokenType.UBIGINT, 162 TokenType.INT128, 163 TokenType.UINT128, 164 TokenType.INT256, 165 TokenType.UINT256, 166 TokenType.MEDIUMINT, 167 TokenType.UMEDIUMINT, 168 TokenType.FIXEDSTRING, 169 TokenType.FLOAT, 170 TokenType.DOUBLE, 171 TokenType.CHAR, 172 TokenType.NCHAR, 173 TokenType.VARCHAR, 174 TokenType.NVARCHAR, 175 TokenType.TEXT, 176 TokenType.MEDIUMTEXT, 177 TokenType.LONGTEXT, 178 TokenType.MEDIUMBLOB, 179 TokenType.LONGBLOB, 180 TokenType.BINARY, 181 TokenType.VARBINARY, 182 TokenType.JSON, 183 TokenType.JSONB, 184 TokenType.INTERVAL, 185 TokenType.TINYBLOB, 186 TokenType.TINYTEXT, 187 TokenType.TIME, 188 TokenType.TIMETZ, 189 TokenType.TIMESTAMP, 190 TokenType.TIMESTAMP_S, 191 TokenType.TIMESTAMP_MS, 192 TokenType.TIMESTAMP_NS, 193 TokenType.TIMESTAMPTZ, 194 TokenType.TIMESTAMPLTZ, 195 TokenType.DATETIME, 196 TokenType.DATETIME64, 197 TokenType.DATE, 198 TokenType.DATE32, 199 TokenType.INT4RANGE, 200 TokenType.INT4MULTIRANGE, 201 TokenType.INT8RANGE, 202 TokenType.INT8MULTIRANGE, 203 TokenType.NUMRANGE, 204 TokenType.NUMMULTIRANGE, 205 TokenType.TSRANGE, 206 TokenType.TSMULTIRANGE, 207 TokenType.TSTZRANGE, 208 TokenType.TSTZMULTIRANGE, 209 TokenType.DATERANGE, 210 TokenType.DATEMULTIRANGE, 211 TokenType.DECIMAL, 212 TokenType.UDECIMAL, 213 TokenType.BIGDECIMAL, 214 TokenType.UUID, 215 TokenType.GEOGRAPHY, 216 TokenType.GEOMETRY, 217 TokenType.HLLSKETCH, 218 TokenType.HSTORE, 219 TokenType.PSEUDO_TYPE, 220 TokenType.SUPER, 221 TokenType.SERIAL, 222 TokenType.SMALLSERIAL, 223 TokenType.BIGSERIAL, 224 TokenType.XML, 225 TokenType.YEAR, 226 TokenType.UNIQUEIDENTIFIER, 227 TokenType.USERDEFINED, 228 TokenType.MONEY, 229 TokenType.SMALLMONEY, 230 TokenType.ROWVERSION, 231 TokenType.IMAGE, 232 TokenType.VARIANT, 233 TokenType.OBJECT, 234 TokenType.OBJECT_IDENTIFIER, 235 TokenType.INET, 236 TokenType.IPADDRESS, 237 TokenType.IPPREFIX, 238 TokenType.IPV4, 239 TokenType.IPV6, 240 TokenType.UNKNOWN, 241 TokenType.NULL, 242 *ENUM_TYPE_TOKENS, 243 *NESTED_TYPE_TOKENS, 244 } 245 246 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 247 TokenType.BIGINT: TokenType.UBIGINT, 248 TokenType.INT: TokenType.UINT, 249 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 250 TokenType.SMALLINT: TokenType.USMALLINT, 251 TokenType.TINYINT: TokenType.UTINYINT, 252 TokenType.DECIMAL: TokenType.UDECIMAL, 253 } 254 255 SUBQUERY_PREDICATES = { 256 TokenType.ANY: exp.Any, 257 TokenType.ALL: exp.All, 258 TokenType.EXISTS: exp.Exists, 259 TokenType.SOME: exp.Any, 260 } 261 262 RESERVED_TOKENS = { 263 *Tokenizer.SINGLE_TOKENS.values(), 264 TokenType.SELECT, 265 } 266 267 DB_CREATABLES = { 268 TokenType.DATABASE, 269 TokenType.SCHEMA, 270 TokenType.TABLE, 271 TokenType.VIEW, 272 TokenType.MODEL, 273 TokenType.DICTIONARY, 274 } 275 276 CREATABLES = { 277 TokenType.COLUMN, 278 TokenType.CONSTRAINT, 279 TokenType.FUNCTION, 280 TokenType.INDEX, 281 TokenType.PROCEDURE, 282 TokenType.FOREIGN_KEY, 283 *DB_CREATABLES, 284 } 285 286 # Tokens that can represent identifiers 287 ID_VAR_TOKENS = { 288 TokenType.VAR, 289 TokenType.ANTI, 290 TokenType.APPLY, 291 TokenType.ASC, 292 TokenType.AUTO_INCREMENT, 293 TokenType.BEGIN, 294 TokenType.CACHE, 295 TokenType.CASE, 296 TokenType.COLLATE, 297 TokenType.COMMAND, 298 TokenType.COMMENT, 299 TokenType.COMMIT, 300 TokenType.CONSTRAINT, 301 TokenType.DEFAULT, 302 TokenType.DELETE, 303 TokenType.DESC, 304 TokenType.DESCRIBE, 305 TokenType.DICTIONARY, 306 TokenType.DIV, 307 TokenType.END, 308 TokenType.EXECUTE, 309 TokenType.ESCAPE, 310 TokenType.FALSE, 311 TokenType.FIRST, 312 TokenType.FILTER, 313 TokenType.FINAL, 314 TokenType.FORMAT, 315 TokenType.FULL, 316 TokenType.IS, 317 TokenType.ISNULL, 318 TokenType.INTERVAL, 319 TokenType.KEEP, 320 TokenType.KILL, 321 TokenType.LEFT, 322 TokenType.LOAD, 323 TokenType.MERGE, 324 TokenType.NATURAL, 325 TokenType.NEXT, 326 TokenType.OFFSET, 327 TokenType.OPERATOR, 328 TokenType.ORDINALITY, 329 TokenType.OVERLAPS, 330 TokenType.OVERWRITE, 331 TokenType.PARTITION, 332 TokenType.PERCENT, 333 TokenType.PIVOT, 334 TokenType.PRAGMA, 335 TokenType.RANGE, 336 TokenType.RECURSIVE, 337 TokenType.REFERENCES, 338 TokenType.REFRESH, 339 TokenType.REPLACE, 340 TokenType.RIGHT, 341 TokenType.ROW, 342 TokenType.ROWS, 343 TokenType.SEMI, 344 TokenType.SET, 345 TokenType.SETTINGS, 346 TokenType.SHOW, 347 TokenType.TEMPORARY, 348 TokenType.TOP, 349 TokenType.TRUE, 350 TokenType.UNIQUE, 351 TokenType.UNPIVOT, 352 TokenType.UPDATE, 353 TokenType.USE, 354 TokenType.VOLATILE, 355 TokenType.WINDOW, 356 *CREATABLES, 357 *SUBQUERY_PREDICATES, 358 *TYPE_TOKENS, 359 *NO_PAREN_FUNCTIONS, 360 } 361 362 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 363 364 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 365 TokenType.ANTI, 366 TokenType.APPLY, 367 TokenType.ASOF, 368 TokenType.FULL, 369 TokenType.LEFT, 370 TokenType.LOCK, 371 TokenType.NATURAL, 372 TokenType.OFFSET, 373 TokenType.RIGHT, 374 TokenType.SEMI, 375 TokenType.WINDOW, 376 } 377 378 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 379 380 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 381 382 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 383 384 FUNC_TOKENS = { 385 TokenType.COLLATE, 386 TokenType.COMMAND, 387 TokenType.CURRENT_DATE, 388 TokenType.CURRENT_DATETIME, 389 TokenType.CURRENT_TIMESTAMP, 390 TokenType.CURRENT_TIME, 391 TokenType.CURRENT_USER, 392 TokenType.FILTER, 393 TokenType.FIRST, 394 TokenType.FORMAT, 395 TokenType.GLOB, 396 TokenType.IDENTIFIER, 397 TokenType.INDEX, 398 TokenType.ISNULL, 399 TokenType.ILIKE, 400 TokenType.INSERT, 401 TokenType.LIKE, 402 TokenType.MERGE, 403 TokenType.OFFSET, 404 TokenType.PRIMARY_KEY, 405 TokenType.RANGE, 406 TokenType.REPLACE, 407 TokenType.RLIKE, 408 TokenType.ROW, 409 TokenType.UNNEST, 410 TokenType.VAR, 411 TokenType.LEFT, 412 TokenType.RIGHT, 413 TokenType.DATE, 414 TokenType.DATETIME, 415 TokenType.TABLE, 416 TokenType.TIMESTAMP, 417 TokenType.TIMESTAMPTZ, 418 TokenType.WINDOW, 419 TokenType.XOR, 420 *TYPE_TOKENS, 421 *SUBQUERY_PREDICATES, 422 } 423 424 CONJUNCTION = { 425 TokenType.AND: exp.And, 426 TokenType.OR: exp.Or, 427 } 428 429 EQUALITY = { 430 TokenType.COLON_EQ: exp.PropertyEQ, 431 TokenType.EQ: exp.EQ, 432 TokenType.NEQ: exp.NEQ, 433 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 434 } 435 436 COMPARISON = { 437 TokenType.GT: exp.GT, 438 TokenType.GTE: exp.GTE, 439 TokenType.LT: exp.LT, 440 TokenType.LTE: exp.LTE, 441 } 442 443 BITWISE = { 444 TokenType.AMP: exp.BitwiseAnd, 445 TokenType.CARET: exp.BitwiseXor, 446 TokenType.PIPE: exp.BitwiseOr, 447 } 448 449 TERM = { 450 TokenType.DASH: exp.Sub, 451 TokenType.PLUS: exp.Add, 452 TokenType.MOD: exp.Mod, 453 TokenType.COLLATE: exp.Collate, 454 } 455 456 FACTOR = { 457 TokenType.DIV: exp.IntDiv, 458 TokenType.LR_ARROW: exp.Distance, 459 TokenType.SLASH: exp.Div, 460 TokenType.STAR: exp.Mul, 461 } 462 463 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 464 465 TIMES = { 466 TokenType.TIME, 467 TokenType.TIMETZ, 468 } 469 470 TIMESTAMPS = { 471 TokenType.TIMESTAMP, 472 TokenType.TIMESTAMPTZ, 473 TokenType.TIMESTAMPLTZ, 474 *TIMES, 475 } 476 477 SET_OPERATIONS = { 478 TokenType.UNION, 479 TokenType.INTERSECT, 480 TokenType.EXCEPT, 481 } 482 483 JOIN_METHODS = { 484 TokenType.NATURAL, 485 TokenType.ASOF, 486 } 487 488 JOIN_SIDES = { 489 TokenType.LEFT, 490 TokenType.RIGHT, 491 TokenType.FULL, 492 } 493 494 JOIN_KINDS = { 495 TokenType.INNER, 496 TokenType.OUTER, 497 TokenType.CROSS, 498 TokenType.SEMI, 499 TokenType.ANTI, 500 } 501 502 JOIN_HINTS: t.Set[str] = set() 503 504 LAMBDAS = { 505 TokenType.ARROW: lambda self, expressions: self.expression( 506 exp.Lambda, 507 this=self._replace_lambda( 508 self._parse_conjunction(), 509 {node.name for node in expressions}, 510 ), 511 expressions=expressions, 512 ), 513 TokenType.FARROW: lambda self, expressions: self.expression( 514 exp.Kwarg, 515 this=exp.var(expressions[0].name), 516 expression=self._parse_conjunction(), 517 ), 518 } 519 520 COLUMN_OPERATORS = { 521 TokenType.DOT: None, 522 TokenType.DCOLON: lambda self, this, to: self.expression( 523 exp.Cast if self.STRICT_CAST else exp.TryCast, 524 this=this, 525 to=to, 526 ), 527 TokenType.ARROW: lambda self, this, path: self.expression( 528 exp.JSONExtract, 529 this=this, 530 expression=path, 531 ), 532 TokenType.DARROW: lambda self, this, path: self.expression( 533 exp.JSONExtractScalar, 534 this=this, 535 expression=path, 536 ), 537 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 538 exp.JSONBExtract, 539 this=this, 540 expression=path, 541 ), 542 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 543 exp.JSONBExtractScalar, 544 this=this, 545 expression=path, 546 ), 547 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 548 exp.JSONBContains, 549 this=this, 550 expression=key, 551 ), 552 } 553 554 EXPRESSION_PARSERS = { 555 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 556 exp.Column: lambda self: self._parse_column(), 557 exp.Condition: lambda self: self._parse_conjunction(), 558 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 559 exp.Expression: lambda self: self._parse_statement(), 560 exp.From: lambda self: self._parse_from(), 561 exp.Group: lambda self: self._parse_group(), 562 exp.Having: lambda self: self._parse_having(), 563 exp.Identifier: lambda self: self._parse_id_var(), 564 exp.Join: lambda self: self._parse_join(), 565 exp.Lambda: lambda self: self._parse_lambda(), 566 exp.Lateral: lambda self: self._parse_lateral(), 567 exp.Limit: lambda self: self._parse_limit(), 568 exp.Offset: lambda self: self._parse_offset(), 569 exp.Order: lambda self: self._parse_order(), 570 exp.Ordered: lambda self: self._parse_ordered(), 571 exp.Properties: lambda self: self._parse_properties(), 572 exp.Qualify: lambda self: self._parse_qualify(), 573 exp.Returning: lambda self: self._parse_returning(), 574 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 575 exp.Table: lambda self: self._parse_table_parts(), 576 exp.TableAlias: lambda self: self._parse_table_alias(), 577 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 578 exp.Where: lambda self: self._parse_where(), 579 exp.Window: lambda self: self._parse_named_window(), 580 exp.With: lambda self: self._parse_with(), 581 "JOIN_TYPE": lambda self: self._parse_join_parts(), 582 } 583 584 STATEMENT_PARSERS = { 585 TokenType.ALTER: lambda self: self._parse_alter(), 586 TokenType.BEGIN: lambda self: self._parse_transaction(), 587 TokenType.CACHE: lambda self: self._parse_cache(), 588 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 589 TokenType.COMMENT: lambda self: self._parse_comment(), 590 TokenType.CREATE: lambda self: self._parse_create(), 591 TokenType.DELETE: lambda self: self._parse_delete(), 592 TokenType.DESC: lambda self: self._parse_describe(), 593 TokenType.DESCRIBE: lambda self: self._parse_describe(), 594 TokenType.DROP: lambda self: self._parse_drop(), 595 TokenType.INSERT: lambda self: self._parse_insert(), 596 TokenType.KILL: lambda self: self._parse_kill(), 597 TokenType.LOAD: lambda self: self._parse_load(), 598 TokenType.MERGE: lambda self: self._parse_merge(), 599 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 600 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 601 TokenType.REFRESH: lambda self: self._parse_refresh(), 602 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 603 TokenType.SET: lambda self: self._parse_set(), 604 TokenType.UNCACHE: lambda self: self._parse_uncache(), 605 TokenType.UPDATE: lambda self: self._parse_update(), 606 TokenType.USE: lambda self: self.expression( 607 exp.Use, 608 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 609 and exp.var(self._prev.text), 610 this=self._parse_table(schema=False), 611 ), 612 } 613 614 UNARY_PARSERS = { 615 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 616 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 617 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 618 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 619 } 620 621 PRIMARY_PARSERS = { 622 TokenType.STRING: lambda self, token: self.expression( 623 exp.Literal, this=token.text, is_string=True 624 ), 625 TokenType.NUMBER: lambda self, token: self.expression( 626 exp.Literal, this=token.text, is_string=False 627 ), 628 TokenType.STAR: lambda self, _: self.expression( 629 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 630 ), 631 TokenType.NULL: lambda self, _: self.expression(exp.Null), 632 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 633 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 634 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 635 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 636 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 637 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 638 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 639 exp.National, this=token.text 640 ), 641 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 642 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 643 exp.RawString, this=token.text 644 ), 645 TokenType.UNICODE_STRING: lambda self, token: self.expression( 646 exp.UnicodeString, 647 this=token.text, 648 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 649 ), 650 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 651 } 652 653 PLACEHOLDER_PARSERS = { 654 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 655 TokenType.PARAMETER: lambda self: self._parse_parameter(), 656 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 657 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 658 else None, 659 } 660 661 RANGE_PARSERS = { 662 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 663 TokenType.GLOB: binary_range_parser(exp.Glob), 664 TokenType.ILIKE: binary_range_parser(exp.ILike), 665 TokenType.IN: lambda self, this: self._parse_in(this), 666 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 667 TokenType.IS: lambda self, this: self._parse_is(this), 668 TokenType.LIKE: binary_range_parser(exp.Like), 669 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 670 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 671 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 672 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 673 } 674 675 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 676 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 677 "AUTO": lambda self: self._parse_auto_property(), 678 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 679 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 680 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 681 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 682 "CHECKSUM": lambda self: self._parse_checksum(), 683 "CLUSTER BY": lambda self: self._parse_cluster(), 684 "CLUSTERED": lambda self: self._parse_clustered_by(), 685 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 686 exp.CollateProperty, **kwargs 687 ), 688 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 689 "CONTAINS": lambda self: self._parse_contains_property(), 690 "COPY": lambda self: self._parse_copy_property(), 691 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 692 "DEFINER": lambda self: self._parse_definer(), 693 "DETERMINISTIC": lambda self: self.expression( 694 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 695 ), 696 "DISTKEY": lambda self: self._parse_distkey(), 697 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 698 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 699 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 700 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 701 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 702 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 703 "FREESPACE": lambda self: self._parse_freespace(), 704 "HEAP": lambda self: self.expression(exp.HeapProperty), 705 "IMMUTABLE": lambda self: self.expression( 706 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 707 ), 708 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 709 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 710 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 711 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 712 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 713 "LIKE": lambda self: self._parse_create_like(), 714 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 715 "LOCK": lambda self: self._parse_locking(), 716 "LOCKING": lambda self: self._parse_locking(), 717 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 718 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 719 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 720 "MODIFIES": lambda self: self._parse_modifies_property(), 721 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 722 "NO": lambda self: self._parse_no_property(), 723 "ON": lambda self: self._parse_on_property(), 724 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 725 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 726 "PARTITION": lambda self: self._parse_partitioned_of(), 727 "PARTITION BY": lambda self: self._parse_partitioned_by(), 728 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 729 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 730 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 731 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 732 "READS": lambda self: self._parse_reads_property(), 733 "REMOTE": lambda self: self._parse_remote_with_connection(), 734 "RETURNS": lambda self: self._parse_returns(), 735 "ROW": lambda self: self._parse_row(), 736 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 737 "SAMPLE": lambda self: self.expression( 738 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 739 ), 740 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 741 "SETTINGS": lambda self: self.expression( 742 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 743 ), 744 "SORTKEY": lambda self: self._parse_sortkey(), 745 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 746 "STABLE": lambda self: self.expression( 747 exp.StabilityProperty, this=exp.Literal.string("STABLE") 748 ), 749 "STORED": lambda self: self._parse_stored(), 750 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 751 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 752 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 753 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 754 "TO": lambda self: self._parse_to_table(), 755 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 756 "TRANSFORM": lambda self: self.expression( 757 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 758 ), 759 "TTL": lambda self: self._parse_ttl(), 760 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 761 "VOLATILE": lambda self: self._parse_volatile_property(), 762 "WITH": lambda self: self._parse_with_property(), 763 } 764 765 CONSTRAINT_PARSERS = { 766 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 767 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 768 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 769 "CHARACTER SET": lambda self: self.expression( 770 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 771 ), 772 "CHECK": lambda self: self.expression( 773 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 774 ), 775 "COLLATE": lambda self: self.expression( 776 exp.CollateColumnConstraint, this=self._parse_var() 777 ), 778 "COMMENT": lambda self: self.expression( 779 exp.CommentColumnConstraint, this=self._parse_string() 780 ), 781 "COMPRESS": lambda self: self._parse_compress(), 782 "CLUSTERED": lambda self: self.expression( 783 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 784 ), 785 "NONCLUSTERED": lambda self: self.expression( 786 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 787 ), 788 "DEFAULT": lambda self: self.expression( 789 exp.DefaultColumnConstraint, this=self._parse_bitwise() 790 ), 791 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 792 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 793 "FORMAT": lambda self: self.expression( 794 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 795 ), 796 "GENERATED": lambda self: self._parse_generated_as_identity(), 797 "IDENTITY": lambda self: self._parse_auto_increment(), 798 "INLINE": lambda self: self._parse_inline(), 799 "LIKE": lambda self: self._parse_create_like(), 800 "NOT": lambda self: self._parse_not_constraint(), 801 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 802 "ON": lambda self: ( 803 self._match(TokenType.UPDATE) 804 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 805 ) 806 or self.expression(exp.OnProperty, this=self._parse_id_var()), 807 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 808 "PERIOD": lambda self: self._parse_period_for_system_time(), 809 "PRIMARY KEY": lambda self: self._parse_primary_key(), 810 "REFERENCES": lambda self: self._parse_references(match=False), 811 "TITLE": lambda self: self.expression( 812 exp.TitleColumnConstraint, this=self._parse_var_or_string() 813 ), 814 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 815 "UNIQUE": lambda self: self._parse_unique(), 816 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 817 "WITH": lambda self: self.expression( 818 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 819 ), 820 } 821 822 ALTER_PARSERS = { 823 "ADD": lambda self: self._parse_alter_table_add(), 824 "ALTER": lambda self: self._parse_alter_table_alter(), 825 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 826 "DROP": lambda self: self._parse_alter_table_drop(), 827 "RENAME": lambda self: self._parse_alter_table_rename(), 828 } 829 830 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 831 832 NO_PAREN_FUNCTION_PARSERS = { 833 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 834 "CASE": lambda self: self._parse_case(), 835 "IF": lambda self: self._parse_if(), 836 "NEXT": lambda self: self._parse_next_value_for(), 837 } 838 839 INVALID_FUNC_NAME_TOKENS = { 840 TokenType.IDENTIFIER, 841 TokenType.STRING, 842 } 843 844 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 845 846 FUNCTION_PARSERS = { 847 "ANY_VALUE": lambda self: self._parse_any_value(), 848 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 849 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 850 "DECODE": lambda self: self._parse_decode(), 851 "EXTRACT": lambda self: self._parse_extract(), 852 "JSON_OBJECT": lambda self: self._parse_json_object(), 853 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 854 "JSON_TABLE": lambda self: self._parse_json_table(), 855 "MATCH": lambda self: self._parse_match_against(), 856 "OPENJSON": lambda self: self._parse_open_json(), 857 "POSITION": lambda self: self._parse_position(), 858 "PREDICT": lambda self: self._parse_predict(), 859 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 860 "STRING_AGG": lambda self: self._parse_string_agg(), 861 "SUBSTRING": lambda self: self._parse_substring(), 862 "TRIM": lambda self: self._parse_trim(), 863 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 864 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 865 } 866 867 QUERY_MODIFIER_PARSERS = { 868 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 869 TokenType.WHERE: lambda self: ("where", self._parse_where()), 870 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 871 TokenType.HAVING: lambda self: ("having", self._parse_having()), 872 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 873 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 874 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 875 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 876 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 877 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 878 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 879 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 880 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 881 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 882 TokenType.CLUSTER_BY: lambda self: ( 883 "cluster", 884 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 885 ), 886 TokenType.DISTRIBUTE_BY: lambda self: ( 887 "distribute", 888 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 889 ), 890 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 891 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 892 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 893 } 894 895 SET_PARSERS = { 896 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 897 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 898 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 899 "TRANSACTION": lambda self: self._parse_set_transaction(), 900 } 901 902 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 903 904 TYPE_LITERAL_PARSERS = { 905 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 906 } 907 908 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 909 910 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 911 912 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 913 914 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 915 TRANSACTION_CHARACTERISTICS = { 916 "ISOLATION LEVEL REPEATABLE READ", 917 "ISOLATION LEVEL READ COMMITTED", 918 "ISOLATION LEVEL READ UNCOMMITTED", 919 "ISOLATION LEVEL SERIALIZABLE", 920 "READ WRITE", 921 "READ ONLY", 922 } 923 924 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 925 926 CLONE_KEYWORDS = {"CLONE", "COPY"} 927 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 928 929 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 930 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 931 932 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 933 934 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 935 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 936 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 937 938 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 939 940 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 941 942 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 943 944 DISTINCT_TOKENS = {TokenType.DISTINCT} 945 946 NULL_TOKENS = {TokenType.NULL} 947 948 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 949 950 STRICT_CAST = True 951 952 PREFIXED_PIVOT_COLUMNS = False 953 IDENTIFY_PIVOT_STRINGS = False 954 955 LOG_DEFAULTS_TO_LN = False 956 957 # Whether or not ADD is present for each column added by ALTER TABLE 958 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 959 960 # Whether or not the table sample clause expects CSV syntax 961 TABLESAMPLE_CSV = False 962 963 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 964 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 965 966 # Whether the TRIM function expects the characters to trim as its first argument 967 TRIM_PATTERN_FIRST = False 968 969 # Whether or not string aliases are supported `SELECT COUNT(*) 'count'` 970 STRING_ALIASES = False 971 972 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 973 MODIFIERS_ATTACHED_TO_UNION = True 974 UNION_MODIFIERS = {"order", "limit", "offset"} 975 976 __slots__ = ( 977 "error_level", 978 "error_message_context", 979 "max_errors", 980 "dialect", 981 "sql", 982 "errors", 983 "_tokens", 984 "_index", 985 "_curr", 986 "_next", 987 "_prev", 988 "_prev_comments", 989 ) 990 991 # Autofilled 992 SHOW_TRIE: t.Dict = {} 993 SET_TRIE: t.Dict = {} 994 995 def __init__( 996 self, 997 error_level: t.Optional[ErrorLevel] = None, 998 error_message_context: int = 100, 999 max_errors: int = 3, 1000 dialect: DialectType = None, 1001 ): 1002 from sqlglot.dialects import Dialect 1003 1004 self.error_level = error_level or ErrorLevel.IMMEDIATE 1005 self.error_message_context = error_message_context 1006 self.max_errors = max_errors 1007 self.dialect = Dialect.get_or_raise(dialect) 1008 self.reset() 1009 1010 def reset(self): 1011 self.sql = "" 1012 self.errors = [] 1013 self._tokens = [] 1014 self._index = 0 1015 self._curr = None 1016 self._next = None 1017 self._prev = None 1018 self._prev_comments = None 1019 1020 def parse( 1021 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1022 ) -> t.List[t.Optional[exp.Expression]]: 1023 """ 1024 Parses a list of tokens and returns a list of syntax trees, one tree 1025 per parsed SQL statement. 1026 1027 Args: 1028 raw_tokens: The list of tokens. 1029 sql: The original SQL string, used to produce helpful debug messages. 1030 1031 Returns: 1032 The list of the produced syntax trees. 1033 """ 1034 return self._parse( 1035 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1036 ) 1037 1038 def parse_into( 1039 self, 1040 expression_types: exp.IntoType, 1041 raw_tokens: t.List[Token], 1042 sql: t.Optional[str] = None, 1043 ) -> t.List[t.Optional[exp.Expression]]: 1044 """ 1045 Parses a list of tokens into a given Expression type. If a collection of Expression 1046 types is given instead, this method will try to parse the token list into each one 1047 of them, stopping at the first for which the parsing succeeds. 1048 1049 Args: 1050 expression_types: The expression type(s) to try and parse the token list into. 1051 raw_tokens: The list of tokens. 1052 sql: The original SQL string, used to produce helpful debug messages. 1053 1054 Returns: 1055 The target Expression. 1056 """ 1057 errors = [] 1058 for expression_type in ensure_list(expression_types): 1059 parser = self.EXPRESSION_PARSERS.get(expression_type) 1060 if not parser: 1061 raise TypeError(f"No parser registered for {expression_type}") 1062 1063 try: 1064 return self._parse(parser, raw_tokens, sql) 1065 except ParseError as e: 1066 e.errors[0]["into_expression"] = expression_type 1067 errors.append(e) 1068 1069 raise ParseError( 1070 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1071 errors=merge_errors(errors), 1072 ) from errors[-1] 1073 1074 def _parse( 1075 self, 1076 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1077 raw_tokens: t.List[Token], 1078 sql: t.Optional[str] = None, 1079 ) -> t.List[t.Optional[exp.Expression]]: 1080 self.reset() 1081 self.sql = sql or "" 1082 1083 total = len(raw_tokens) 1084 chunks: t.List[t.List[Token]] = [[]] 1085 1086 for i, token in enumerate(raw_tokens): 1087 if token.token_type == TokenType.SEMICOLON: 1088 if i < total - 1: 1089 chunks.append([]) 1090 else: 1091 chunks[-1].append(token) 1092 1093 expressions = [] 1094 1095 for tokens in chunks: 1096 self._index = -1 1097 self._tokens = tokens 1098 self._advance() 1099 1100 expressions.append(parse_method(self)) 1101 1102 if self._index < len(self._tokens): 1103 self.raise_error("Invalid expression / Unexpected token") 1104 1105 self.check_errors() 1106 1107 return expressions 1108 1109 def check_errors(self) -> None: 1110 """Logs or raises any found errors, depending on the chosen error level setting.""" 1111 if self.error_level == ErrorLevel.WARN: 1112 for error in self.errors: 1113 logger.error(str(error)) 1114 elif self.error_level == ErrorLevel.RAISE and self.errors: 1115 raise ParseError( 1116 concat_messages(self.errors, self.max_errors), 1117 errors=merge_errors(self.errors), 1118 ) 1119 1120 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1121 """ 1122 Appends an error in the list of recorded errors or raises it, depending on the chosen 1123 error level setting. 1124 """ 1125 token = token or self._curr or self._prev or Token.string("") 1126 start = token.start 1127 end = token.end + 1 1128 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1129 highlight = self.sql[start:end] 1130 end_context = self.sql[end : end + self.error_message_context] 1131 1132 error = ParseError.new( 1133 f"{message}. Line {token.line}, Col: {token.col}.\n" 1134 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1135 description=message, 1136 line=token.line, 1137 col=token.col, 1138 start_context=start_context, 1139 highlight=highlight, 1140 end_context=end_context, 1141 ) 1142 1143 if self.error_level == ErrorLevel.IMMEDIATE: 1144 raise error 1145 1146 self.errors.append(error) 1147 1148 def expression( 1149 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1150 ) -> E: 1151 """ 1152 Creates a new, validated Expression. 1153 1154 Args: 1155 exp_class: The expression class to instantiate. 1156 comments: An optional list of comments to attach to the expression. 1157 kwargs: The arguments to set for the expression along with their respective values. 1158 1159 Returns: 1160 The target expression. 1161 """ 1162 instance = exp_class(**kwargs) 1163 instance.add_comments(comments) if comments else self._add_comments(instance) 1164 return self.validate_expression(instance) 1165 1166 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1167 if expression and self._prev_comments: 1168 expression.add_comments(self._prev_comments) 1169 self._prev_comments = None 1170 1171 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1172 """ 1173 Validates an Expression, making sure that all its mandatory arguments are set. 1174 1175 Args: 1176 expression: The expression to validate. 1177 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1178 1179 Returns: 1180 The validated expression. 1181 """ 1182 if self.error_level != ErrorLevel.IGNORE: 1183 for error_message in expression.error_messages(args): 1184 self.raise_error(error_message) 1185 1186 return expression 1187 1188 def _find_sql(self, start: Token, end: Token) -> str: 1189 return self.sql[start.start : end.end + 1] 1190 1191 def _is_connected(self) -> bool: 1192 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1193 1194 def _advance(self, times: int = 1) -> None: 1195 self._index += times 1196 self._curr = seq_get(self._tokens, self._index) 1197 self._next = seq_get(self._tokens, self._index + 1) 1198 1199 if self._index > 0: 1200 self._prev = self._tokens[self._index - 1] 1201 self._prev_comments = self._prev.comments 1202 else: 1203 self._prev = None 1204 self._prev_comments = None 1205 1206 def _retreat(self, index: int) -> None: 1207 if index != self._index: 1208 self._advance(index - self._index) 1209 1210 def _parse_command(self) -> exp.Command: 1211 return self.expression( 1212 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1213 ) 1214 1215 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1216 start = self._prev 1217 exists = self._parse_exists() if allow_exists else None 1218 1219 self._match(TokenType.ON) 1220 1221 kind = self._match_set(self.CREATABLES) and self._prev 1222 if not kind: 1223 return self._parse_as_command(start) 1224 1225 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1226 this = self._parse_user_defined_function(kind=kind.token_type) 1227 elif kind.token_type == TokenType.TABLE: 1228 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1229 elif kind.token_type == TokenType.COLUMN: 1230 this = self._parse_column() 1231 else: 1232 this = self._parse_id_var() 1233 1234 self._match(TokenType.IS) 1235 1236 return self.expression( 1237 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1238 ) 1239 1240 def _parse_to_table( 1241 self, 1242 ) -> exp.ToTableProperty: 1243 table = self._parse_table_parts(schema=True) 1244 return self.expression(exp.ToTableProperty, this=table) 1245 1246 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1247 def _parse_ttl(self) -> exp.Expression: 1248 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1249 this = self._parse_bitwise() 1250 1251 if self._match_text_seq("DELETE"): 1252 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1253 if self._match_text_seq("RECOMPRESS"): 1254 return self.expression( 1255 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1256 ) 1257 if self._match_text_seq("TO", "DISK"): 1258 return self.expression( 1259 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1260 ) 1261 if self._match_text_seq("TO", "VOLUME"): 1262 return self.expression( 1263 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1264 ) 1265 1266 return this 1267 1268 expressions = self._parse_csv(_parse_ttl_action) 1269 where = self._parse_where() 1270 group = self._parse_group() 1271 1272 aggregates = None 1273 if group and self._match(TokenType.SET): 1274 aggregates = self._parse_csv(self._parse_set_item) 1275 1276 return self.expression( 1277 exp.MergeTreeTTL, 1278 expressions=expressions, 1279 where=where, 1280 group=group, 1281 aggregates=aggregates, 1282 ) 1283 1284 def _parse_statement(self) -> t.Optional[exp.Expression]: 1285 if self._curr is None: 1286 return None 1287 1288 if self._match_set(self.STATEMENT_PARSERS): 1289 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1290 1291 if self._match_set(Tokenizer.COMMANDS): 1292 return self._parse_command() 1293 1294 expression = self._parse_expression() 1295 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1296 return self._parse_query_modifiers(expression) 1297 1298 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1299 start = self._prev 1300 temporary = self._match(TokenType.TEMPORARY) 1301 materialized = self._match_text_seq("MATERIALIZED") 1302 1303 kind = self._match_set(self.CREATABLES) and self._prev.text 1304 if not kind: 1305 return self._parse_as_command(start) 1306 1307 return self.expression( 1308 exp.Drop, 1309 comments=start.comments, 1310 exists=exists or self._parse_exists(), 1311 this=self._parse_table(schema=True), 1312 kind=kind, 1313 temporary=temporary, 1314 materialized=materialized, 1315 cascade=self._match_text_seq("CASCADE"), 1316 constraints=self._match_text_seq("CONSTRAINTS"), 1317 purge=self._match_text_seq("PURGE"), 1318 ) 1319 1320 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1321 return ( 1322 self._match_text_seq("IF") 1323 and (not not_ or self._match(TokenType.NOT)) 1324 and self._match(TokenType.EXISTS) 1325 ) 1326 1327 def _parse_create(self) -> exp.Create | exp.Command: 1328 # Note: this can't be None because we've matched a statement parser 1329 start = self._prev 1330 comments = self._prev_comments 1331 1332 replace = start.text.upper() == "REPLACE" or self._match_pair( 1333 TokenType.OR, TokenType.REPLACE 1334 ) 1335 unique = self._match(TokenType.UNIQUE) 1336 1337 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1338 self._advance() 1339 1340 properties = None 1341 create_token = self._match_set(self.CREATABLES) and self._prev 1342 1343 if not create_token: 1344 # exp.Properties.Location.POST_CREATE 1345 properties = self._parse_properties() 1346 create_token = self._match_set(self.CREATABLES) and self._prev 1347 1348 if not properties or not create_token: 1349 return self._parse_as_command(start) 1350 1351 exists = self._parse_exists(not_=True) 1352 this = None 1353 expression: t.Optional[exp.Expression] = None 1354 indexes = None 1355 no_schema_binding = None 1356 begin = None 1357 end = None 1358 clone = None 1359 1360 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1361 nonlocal properties 1362 if properties and temp_props: 1363 properties.expressions.extend(temp_props.expressions) 1364 elif temp_props: 1365 properties = temp_props 1366 1367 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1368 this = self._parse_user_defined_function(kind=create_token.token_type) 1369 1370 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1371 extend_props(self._parse_properties()) 1372 1373 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1374 1375 if not expression: 1376 if self._match(TokenType.COMMAND): 1377 expression = self._parse_as_command(self._prev) 1378 else: 1379 begin = self._match(TokenType.BEGIN) 1380 return_ = self._match_text_seq("RETURN") 1381 1382 if self._match(TokenType.STRING, advance=False): 1383 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1384 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1385 expression = self._parse_string() 1386 extend_props(self._parse_properties()) 1387 else: 1388 expression = self._parse_statement() 1389 1390 end = self._match_text_seq("END") 1391 1392 if return_: 1393 expression = self.expression(exp.Return, this=expression) 1394 elif create_token.token_type == TokenType.INDEX: 1395 this = self._parse_index(index=self._parse_id_var()) 1396 elif create_token.token_type in self.DB_CREATABLES: 1397 table_parts = self._parse_table_parts(schema=True) 1398 1399 # exp.Properties.Location.POST_NAME 1400 self._match(TokenType.COMMA) 1401 extend_props(self._parse_properties(before=True)) 1402 1403 this = self._parse_schema(this=table_parts) 1404 1405 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1406 extend_props(self._parse_properties()) 1407 1408 self._match(TokenType.ALIAS) 1409 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1410 # exp.Properties.Location.POST_ALIAS 1411 extend_props(self._parse_properties()) 1412 1413 expression = self._parse_ddl_select() 1414 1415 if create_token.token_type == TokenType.TABLE: 1416 # exp.Properties.Location.POST_EXPRESSION 1417 extend_props(self._parse_properties()) 1418 1419 indexes = [] 1420 while True: 1421 index = self._parse_index() 1422 1423 # exp.Properties.Location.POST_INDEX 1424 extend_props(self._parse_properties()) 1425 1426 if not index: 1427 break 1428 else: 1429 self._match(TokenType.COMMA) 1430 indexes.append(index) 1431 elif create_token.token_type == TokenType.VIEW: 1432 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1433 no_schema_binding = True 1434 1435 shallow = self._match_text_seq("SHALLOW") 1436 1437 if self._match_texts(self.CLONE_KEYWORDS): 1438 copy = self._prev.text.lower() == "copy" 1439 clone = self.expression( 1440 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1441 ) 1442 1443 return self.expression( 1444 exp.Create, 1445 comments=comments, 1446 this=this, 1447 kind=create_token.text.upper(), 1448 replace=replace, 1449 unique=unique, 1450 expression=expression, 1451 exists=exists, 1452 properties=properties, 1453 indexes=indexes, 1454 no_schema_binding=no_schema_binding, 1455 begin=begin, 1456 end=end, 1457 clone=clone, 1458 ) 1459 1460 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1461 # only used for teradata currently 1462 self._match(TokenType.COMMA) 1463 1464 kwargs = { 1465 "no": self._match_text_seq("NO"), 1466 "dual": self._match_text_seq("DUAL"), 1467 "before": self._match_text_seq("BEFORE"), 1468 "default": self._match_text_seq("DEFAULT"), 1469 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1470 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1471 "after": self._match_text_seq("AFTER"), 1472 "minimum": self._match_texts(("MIN", "MINIMUM")), 1473 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1474 } 1475 1476 if self._match_texts(self.PROPERTY_PARSERS): 1477 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1478 try: 1479 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1480 except TypeError: 1481 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1482 1483 return None 1484 1485 def _parse_property(self) -> t.Optional[exp.Expression]: 1486 if self._match_texts(self.PROPERTY_PARSERS): 1487 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1488 1489 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1490 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1491 1492 if self._match_text_seq("COMPOUND", "SORTKEY"): 1493 return self._parse_sortkey(compound=True) 1494 1495 if self._match_text_seq("SQL", "SECURITY"): 1496 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1497 1498 index = self._index 1499 key = self._parse_column() 1500 1501 if not self._match(TokenType.EQ): 1502 self._retreat(index) 1503 return None 1504 1505 return self.expression( 1506 exp.Property, 1507 this=key.to_dot() if isinstance(key, exp.Column) else key, 1508 value=self._parse_column() or self._parse_var(any_token=True), 1509 ) 1510 1511 def _parse_stored(self) -> exp.FileFormatProperty: 1512 self._match(TokenType.ALIAS) 1513 1514 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1515 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1516 1517 return self.expression( 1518 exp.FileFormatProperty, 1519 this=self.expression( 1520 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1521 ) 1522 if input_format or output_format 1523 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1524 ) 1525 1526 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1527 self._match(TokenType.EQ) 1528 self._match(TokenType.ALIAS) 1529 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1530 1531 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1532 properties = [] 1533 while True: 1534 if before: 1535 prop = self._parse_property_before() 1536 else: 1537 prop = self._parse_property() 1538 1539 if not prop: 1540 break 1541 for p in ensure_list(prop): 1542 properties.append(p) 1543 1544 if properties: 1545 return self.expression(exp.Properties, expressions=properties) 1546 1547 return None 1548 1549 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1550 return self.expression( 1551 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1552 ) 1553 1554 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1555 if self._index >= 2: 1556 pre_volatile_token = self._tokens[self._index - 2] 1557 else: 1558 pre_volatile_token = None 1559 1560 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1561 return exp.VolatileProperty() 1562 1563 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1564 1565 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1566 self._match_pair(TokenType.EQ, TokenType.ON) 1567 1568 prop = self.expression(exp.WithSystemVersioningProperty) 1569 if self._match(TokenType.L_PAREN): 1570 self._match_text_seq("HISTORY_TABLE", "=") 1571 prop.set("this", self._parse_table_parts()) 1572 1573 if self._match(TokenType.COMMA): 1574 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1575 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1576 1577 self._match_r_paren() 1578 1579 return prop 1580 1581 def _parse_with_property( 1582 self, 1583 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1584 if self._match(TokenType.L_PAREN, advance=False): 1585 return self._parse_wrapped_csv(self._parse_property) 1586 1587 if self._match_text_seq("JOURNAL"): 1588 return self._parse_withjournaltable() 1589 1590 if self._match_text_seq("DATA"): 1591 return self._parse_withdata(no=False) 1592 elif self._match_text_seq("NO", "DATA"): 1593 return self._parse_withdata(no=True) 1594 1595 if not self._next: 1596 return None 1597 1598 return self._parse_withisolatedloading() 1599 1600 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1601 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1602 self._match(TokenType.EQ) 1603 1604 user = self._parse_id_var() 1605 self._match(TokenType.PARAMETER) 1606 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1607 1608 if not user or not host: 1609 return None 1610 1611 return exp.DefinerProperty(this=f"{user}@{host}") 1612 1613 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1614 self._match(TokenType.TABLE) 1615 self._match(TokenType.EQ) 1616 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1617 1618 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1619 return self.expression(exp.LogProperty, no=no) 1620 1621 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1622 return self.expression(exp.JournalProperty, **kwargs) 1623 1624 def _parse_checksum(self) -> exp.ChecksumProperty: 1625 self._match(TokenType.EQ) 1626 1627 on = None 1628 if self._match(TokenType.ON): 1629 on = True 1630 elif self._match_text_seq("OFF"): 1631 on = False 1632 1633 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1634 1635 def _parse_cluster(self) -> exp.Cluster: 1636 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1637 1638 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1639 self._match_text_seq("BY") 1640 1641 self._match_l_paren() 1642 expressions = self._parse_csv(self._parse_column) 1643 self._match_r_paren() 1644 1645 if self._match_text_seq("SORTED", "BY"): 1646 self._match_l_paren() 1647 sorted_by = self._parse_csv(self._parse_ordered) 1648 self._match_r_paren() 1649 else: 1650 sorted_by = None 1651 1652 self._match(TokenType.INTO) 1653 buckets = self._parse_number() 1654 self._match_text_seq("BUCKETS") 1655 1656 return self.expression( 1657 exp.ClusteredByProperty, 1658 expressions=expressions, 1659 sorted_by=sorted_by, 1660 buckets=buckets, 1661 ) 1662 1663 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1664 if not self._match_text_seq("GRANTS"): 1665 self._retreat(self._index - 1) 1666 return None 1667 1668 return self.expression(exp.CopyGrantsProperty) 1669 1670 def _parse_freespace(self) -> exp.FreespaceProperty: 1671 self._match(TokenType.EQ) 1672 return self.expression( 1673 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1674 ) 1675 1676 def _parse_mergeblockratio( 1677 self, no: bool = False, default: bool = False 1678 ) -> exp.MergeBlockRatioProperty: 1679 if self._match(TokenType.EQ): 1680 return self.expression( 1681 exp.MergeBlockRatioProperty, 1682 this=self._parse_number(), 1683 percent=self._match(TokenType.PERCENT), 1684 ) 1685 1686 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1687 1688 def _parse_datablocksize( 1689 self, 1690 default: t.Optional[bool] = None, 1691 minimum: t.Optional[bool] = None, 1692 maximum: t.Optional[bool] = None, 1693 ) -> exp.DataBlocksizeProperty: 1694 self._match(TokenType.EQ) 1695 size = self._parse_number() 1696 1697 units = None 1698 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1699 units = self._prev.text 1700 1701 return self.expression( 1702 exp.DataBlocksizeProperty, 1703 size=size, 1704 units=units, 1705 default=default, 1706 minimum=minimum, 1707 maximum=maximum, 1708 ) 1709 1710 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1711 self._match(TokenType.EQ) 1712 always = self._match_text_seq("ALWAYS") 1713 manual = self._match_text_seq("MANUAL") 1714 never = self._match_text_seq("NEVER") 1715 default = self._match_text_seq("DEFAULT") 1716 1717 autotemp = None 1718 if self._match_text_seq("AUTOTEMP"): 1719 autotemp = self._parse_schema() 1720 1721 return self.expression( 1722 exp.BlockCompressionProperty, 1723 always=always, 1724 manual=manual, 1725 never=never, 1726 default=default, 1727 autotemp=autotemp, 1728 ) 1729 1730 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1731 no = self._match_text_seq("NO") 1732 concurrent = self._match_text_seq("CONCURRENT") 1733 self._match_text_seq("ISOLATED", "LOADING") 1734 for_all = self._match_text_seq("FOR", "ALL") 1735 for_insert = self._match_text_seq("FOR", "INSERT") 1736 for_none = self._match_text_seq("FOR", "NONE") 1737 return self.expression( 1738 exp.IsolatedLoadingProperty, 1739 no=no, 1740 concurrent=concurrent, 1741 for_all=for_all, 1742 for_insert=for_insert, 1743 for_none=for_none, 1744 ) 1745 1746 def _parse_locking(self) -> exp.LockingProperty: 1747 if self._match(TokenType.TABLE): 1748 kind = "TABLE" 1749 elif self._match(TokenType.VIEW): 1750 kind = "VIEW" 1751 elif self._match(TokenType.ROW): 1752 kind = "ROW" 1753 elif self._match_text_seq("DATABASE"): 1754 kind = "DATABASE" 1755 else: 1756 kind = None 1757 1758 if kind in ("DATABASE", "TABLE", "VIEW"): 1759 this = self._parse_table_parts() 1760 else: 1761 this = None 1762 1763 if self._match(TokenType.FOR): 1764 for_or_in = "FOR" 1765 elif self._match(TokenType.IN): 1766 for_or_in = "IN" 1767 else: 1768 for_or_in = None 1769 1770 if self._match_text_seq("ACCESS"): 1771 lock_type = "ACCESS" 1772 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1773 lock_type = "EXCLUSIVE" 1774 elif self._match_text_seq("SHARE"): 1775 lock_type = "SHARE" 1776 elif self._match_text_seq("READ"): 1777 lock_type = "READ" 1778 elif self._match_text_seq("WRITE"): 1779 lock_type = "WRITE" 1780 elif self._match_text_seq("CHECKSUM"): 1781 lock_type = "CHECKSUM" 1782 else: 1783 lock_type = None 1784 1785 override = self._match_text_seq("OVERRIDE") 1786 1787 return self.expression( 1788 exp.LockingProperty, 1789 this=this, 1790 kind=kind, 1791 for_or_in=for_or_in, 1792 lock_type=lock_type, 1793 override=override, 1794 ) 1795 1796 def _parse_partition_by(self) -> t.List[exp.Expression]: 1797 if self._match(TokenType.PARTITION_BY): 1798 return self._parse_csv(self._parse_conjunction) 1799 return [] 1800 1801 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1802 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1803 if self._match_text_seq("MINVALUE"): 1804 return exp.var("MINVALUE") 1805 if self._match_text_seq("MAXVALUE"): 1806 return exp.var("MAXVALUE") 1807 return self._parse_bitwise() 1808 1809 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1810 expression = None 1811 from_expressions = None 1812 to_expressions = None 1813 1814 if self._match(TokenType.IN): 1815 this = self._parse_wrapped_csv(self._parse_bitwise) 1816 elif self._match(TokenType.FROM): 1817 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1818 self._match_text_seq("TO") 1819 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1820 elif self._match_text_seq("WITH", "(", "MODULUS"): 1821 this = self._parse_number() 1822 self._match_text_seq(",", "REMAINDER") 1823 expression = self._parse_number() 1824 self._match_r_paren() 1825 else: 1826 self.raise_error("Failed to parse partition bound spec.") 1827 1828 return self.expression( 1829 exp.PartitionBoundSpec, 1830 this=this, 1831 expression=expression, 1832 from_expressions=from_expressions, 1833 to_expressions=to_expressions, 1834 ) 1835 1836 # https://www.postgresql.org/docs/current/sql-createtable.html 1837 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1838 if not self._match_text_seq("OF"): 1839 self._retreat(self._index - 1) 1840 return None 1841 1842 this = self._parse_table(schema=True) 1843 1844 if self._match(TokenType.DEFAULT): 1845 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1846 elif self._match_text_seq("FOR", "VALUES"): 1847 expression = self._parse_partition_bound_spec() 1848 else: 1849 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1850 1851 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1852 1853 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1854 self._match(TokenType.EQ) 1855 return self.expression( 1856 exp.PartitionedByProperty, 1857 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1858 ) 1859 1860 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1861 if self._match_text_seq("AND", "STATISTICS"): 1862 statistics = True 1863 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1864 statistics = False 1865 else: 1866 statistics = None 1867 1868 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1869 1870 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1871 if self._match_text_seq("SQL"): 1872 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 1873 return None 1874 1875 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1876 if self._match_text_seq("SQL", "DATA"): 1877 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 1878 return None 1879 1880 def _parse_no_property(self) -> t.Optional[exp.Expression]: 1881 if self._match_text_seq("PRIMARY", "INDEX"): 1882 return exp.NoPrimaryIndexProperty() 1883 if self._match_text_seq("SQL"): 1884 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 1885 return None 1886 1887 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1888 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1889 return exp.OnCommitProperty() 1890 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1891 return exp.OnCommitProperty(delete=True) 1892 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1893 1894 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1895 if self._match_text_seq("SQL", "DATA"): 1896 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 1897 return None 1898 1899 def _parse_distkey(self) -> exp.DistKeyProperty: 1900 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1901 1902 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1903 table = self._parse_table(schema=True) 1904 1905 options = [] 1906 while self._match_texts(("INCLUDING", "EXCLUDING")): 1907 this = self._prev.text.upper() 1908 1909 id_var = self._parse_id_var() 1910 if not id_var: 1911 return None 1912 1913 options.append( 1914 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1915 ) 1916 1917 return self.expression(exp.LikeProperty, this=table, expressions=options) 1918 1919 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1920 return self.expression( 1921 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1922 ) 1923 1924 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1925 self._match(TokenType.EQ) 1926 return self.expression( 1927 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1928 ) 1929 1930 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1931 self._match_text_seq("WITH", "CONNECTION") 1932 return self.expression( 1933 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1934 ) 1935 1936 def _parse_returns(self) -> exp.ReturnsProperty: 1937 value: t.Optional[exp.Expression] 1938 is_table = self._match(TokenType.TABLE) 1939 1940 if is_table: 1941 if self._match(TokenType.LT): 1942 value = self.expression( 1943 exp.Schema, 1944 this="TABLE", 1945 expressions=self._parse_csv(self._parse_struct_types), 1946 ) 1947 if not self._match(TokenType.GT): 1948 self.raise_error("Expecting >") 1949 else: 1950 value = self._parse_schema(exp.var("TABLE")) 1951 else: 1952 value = self._parse_types() 1953 1954 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1955 1956 def _parse_describe(self) -> exp.Describe: 1957 kind = self._match_set(self.CREATABLES) and self._prev.text 1958 extended = self._match_text_seq("EXTENDED") 1959 this = self._parse_table(schema=True) 1960 properties = self._parse_properties() 1961 expressions = properties.expressions if properties else None 1962 return self.expression( 1963 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 1964 ) 1965 1966 def _parse_insert(self) -> exp.Insert: 1967 comments = ensure_list(self._prev_comments) 1968 overwrite = self._match(TokenType.OVERWRITE) 1969 ignore = self._match(TokenType.IGNORE) 1970 local = self._match_text_seq("LOCAL") 1971 alternative = None 1972 1973 if self._match_text_seq("DIRECTORY"): 1974 this: t.Optional[exp.Expression] = self.expression( 1975 exp.Directory, 1976 this=self._parse_var_or_string(), 1977 local=local, 1978 row_format=self._parse_row_format(match_row=True), 1979 ) 1980 else: 1981 if self._match(TokenType.OR): 1982 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1983 1984 self._match(TokenType.INTO) 1985 comments += ensure_list(self._prev_comments) 1986 self._match(TokenType.TABLE) 1987 this = self._parse_table(schema=True) 1988 1989 returning = self._parse_returning() 1990 1991 return self.expression( 1992 exp.Insert, 1993 comments=comments, 1994 this=this, 1995 by_name=self._match_text_seq("BY", "NAME"), 1996 exists=self._parse_exists(), 1997 partition=self._parse_partition(), 1998 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1999 and self._parse_conjunction(), 2000 expression=self._parse_ddl_select(), 2001 conflict=self._parse_on_conflict(), 2002 returning=returning or self._parse_returning(), 2003 overwrite=overwrite, 2004 alternative=alternative, 2005 ignore=ignore, 2006 ) 2007 2008 def _parse_kill(self) -> exp.Kill: 2009 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2010 2011 return self.expression( 2012 exp.Kill, 2013 this=self._parse_primary(), 2014 kind=kind, 2015 ) 2016 2017 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2018 conflict = self._match_text_seq("ON", "CONFLICT") 2019 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2020 2021 if not conflict and not duplicate: 2022 return None 2023 2024 nothing = None 2025 expressions = None 2026 key = None 2027 constraint = None 2028 2029 if conflict: 2030 if self._match_text_seq("ON", "CONSTRAINT"): 2031 constraint = self._parse_id_var() 2032 else: 2033 key = self._parse_csv(self._parse_value) 2034 2035 self._match_text_seq("DO") 2036 if self._match_text_seq("NOTHING"): 2037 nothing = True 2038 else: 2039 self._match(TokenType.UPDATE) 2040 self._match(TokenType.SET) 2041 expressions = self._parse_csv(self._parse_equality) 2042 2043 return self.expression( 2044 exp.OnConflict, 2045 duplicate=duplicate, 2046 expressions=expressions, 2047 nothing=nothing, 2048 key=key, 2049 constraint=constraint, 2050 ) 2051 2052 def _parse_returning(self) -> t.Optional[exp.Returning]: 2053 if not self._match(TokenType.RETURNING): 2054 return None 2055 return self.expression( 2056 exp.Returning, 2057 expressions=self._parse_csv(self._parse_expression), 2058 into=self._match(TokenType.INTO) and self._parse_table_part(), 2059 ) 2060 2061 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2062 if not self._match(TokenType.FORMAT): 2063 return None 2064 return self._parse_row_format() 2065 2066 def _parse_row_format( 2067 self, match_row: bool = False 2068 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2069 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2070 return None 2071 2072 if self._match_text_seq("SERDE"): 2073 this = self._parse_string() 2074 2075 serde_properties = None 2076 if self._match(TokenType.SERDE_PROPERTIES): 2077 serde_properties = self.expression( 2078 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2079 ) 2080 2081 return self.expression( 2082 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2083 ) 2084 2085 self._match_text_seq("DELIMITED") 2086 2087 kwargs = {} 2088 2089 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2090 kwargs["fields"] = self._parse_string() 2091 if self._match_text_seq("ESCAPED", "BY"): 2092 kwargs["escaped"] = self._parse_string() 2093 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2094 kwargs["collection_items"] = self._parse_string() 2095 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2096 kwargs["map_keys"] = self._parse_string() 2097 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2098 kwargs["lines"] = self._parse_string() 2099 if self._match_text_seq("NULL", "DEFINED", "AS"): 2100 kwargs["null"] = self._parse_string() 2101 2102 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2103 2104 def _parse_load(self) -> exp.LoadData | exp.Command: 2105 if self._match_text_seq("DATA"): 2106 local = self._match_text_seq("LOCAL") 2107 self._match_text_seq("INPATH") 2108 inpath = self._parse_string() 2109 overwrite = self._match(TokenType.OVERWRITE) 2110 self._match_pair(TokenType.INTO, TokenType.TABLE) 2111 2112 return self.expression( 2113 exp.LoadData, 2114 this=self._parse_table(schema=True), 2115 local=local, 2116 overwrite=overwrite, 2117 inpath=inpath, 2118 partition=self._parse_partition(), 2119 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2120 serde=self._match_text_seq("SERDE") and self._parse_string(), 2121 ) 2122 return self._parse_as_command(self._prev) 2123 2124 def _parse_delete(self) -> exp.Delete: 2125 # This handles MySQL's "Multiple-Table Syntax" 2126 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2127 tables = None 2128 comments = self._prev_comments 2129 if not self._match(TokenType.FROM, advance=False): 2130 tables = self._parse_csv(self._parse_table) or None 2131 2132 returning = self._parse_returning() 2133 2134 return self.expression( 2135 exp.Delete, 2136 comments=comments, 2137 tables=tables, 2138 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2139 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2140 where=self._parse_where(), 2141 returning=returning or self._parse_returning(), 2142 limit=self._parse_limit(), 2143 ) 2144 2145 def _parse_update(self) -> exp.Update: 2146 comments = self._prev_comments 2147 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2148 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2149 returning = self._parse_returning() 2150 return self.expression( 2151 exp.Update, 2152 comments=comments, 2153 **{ # type: ignore 2154 "this": this, 2155 "expressions": expressions, 2156 "from": self._parse_from(joins=True), 2157 "where": self._parse_where(), 2158 "returning": returning or self._parse_returning(), 2159 "order": self._parse_order(), 2160 "limit": self._parse_limit(), 2161 }, 2162 ) 2163 2164 def _parse_uncache(self) -> exp.Uncache: 2165 if not self._match(TokenType.TABLE): 2166 self.raise_error("Expecting TABLE after UNCACHE") 2167 2168 return self.expression( 2169 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2170 ) 2171 2172 def _parse_cache(self) -> exp.Cache: 2173 lazy = self._match_text_seq("LAZY") 2174 self._match(TokenType.TABLE) 2175 table = self._parse_table(schema=True) 2176 2177 options = [] 2178 if self._match_text_seq("OPTIONS"): 2179 self._match_l_paren() 2180 k = self._parse_string() 2181 self._match(TokenType.EQ) 2182 v = self._parse_string() 2183 options = [k, v] 2184 self._match_r_paren() 2185 2186 self._match(TokenType.ALIAS) 2187 return self.expression( 2188 exp.Cache, 2189 this=table, 2190 lazy=lazy, 2191 options=options, 2192 expression=self._parse_select(nested=True), 2193 ) 2194 2195 def _parse_partition(self) -> t.Optional[exp.Partition]: 2196 if not self._match(TokenType.PARTITION): 2197 return None 2198 2199 return self.expression( 2200 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2201 ) 2202 2203 def _parse_value(self) -> exp.Tuple: 2204 if self._match(TokenType.L_PAREN): 2205 expressions = self._parse_csv(self._parse_expression) 2206 self._match_r_paren() 2207 return self.expression(exp.Tuple, expressions=expressions) 2208 2209 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2210 # https://prestodb.io/docs/current/sql/values.html 2211 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2212 2213 def _parse_projections(self) -> t.List[exp.Expression]: 2214 return self._parse_expressions() 2215 2216 def _parse_select( 2217 self, 2218 nested: bool = False, 2219 table: bool = False, 2220 parse_subquery_alias: bool = True, 2221 parse_set_operation: bool = True, 2222 ) -> t.Optional[exp.Expression]: 2223 cte = self._parse_with() 2224 2225 if cte: 2226 this = self._parse_statement() 2227 2228 if not this: 2229 self.raise_error("Failed to parse any statement following CTE") 2230 return cte 2231 2232 if "with" in this.arg_types: 2233 this.set("with", cte) 2234 else: 2235 self.raise_error(f"{this.key} does not support CTE") 2236 this = cte 2237 2238 return this 2239 2240 # duckdb supports leading with FROM x 2241 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2242 2243 if self._match(TokenType.SELECT): 2244 comments = self._prev_comments 2245 2246 hint = self._parse_hint() 2247 all_ = self._match(TokenType.ALL) 2248 distinct = self._match_set(self.DISTINCT_TOKENS) 2249 2250 kind = ( 2251 self._match(TokenType.ALIAS) 2252 and self._match_texts(("STRUCT", "VALUE")) 2253 and self._prev.text.upper() 2254 ) 2255 2256 if distinct: 2257 distinct = self.expression( 2258 exp.Distinct, 2259 on=self._parse_value() if self._match(TokenType.ON) else None, 2260 ) 2261 2262 if all_ and distinct: 2263 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2264 2265 limit = self._parse_limit(top=True) 2266 projections = self._parse_projections() 2267 2268 this = self.expression( 2269 exp.Select, 2270 kind=kind, 2271 hint=hint, 2272 distinct=distinct, 2273 expressions=projections, 2274 limit=limit, 2275 ) 2276 this.comments = comments 2277 2278 into = self._parse_into() 2279 if into: 2280 this.set("into", into) 2281 2282 if not from_: 2283 from_ = self._parse_from() 2284 2285 if from_: 2286 this.set("from", from_) 2287 2288 this = self._parse_query_modifiers(this) 2289 elif (table or nested) and self._match(TokenType.L_PAREN): 2290 if self._match(TokenType.PIVOT): 2291 this = self._parse_simplified_pivot() 2292 elif self._match(TokenType.FROM): 2293 this = exp.select("*").from_( 2294 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2295 ) 2296 else: 2297 this = ( 2298 self._parse_table() 2299 if table 2300 else self._parse_select(nested=True, parse_set_operation=False) 2301 ) 2302 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2303 2304 self._match_r_paren() 2305 2306 # We return early here so that the UNION isn't attached to the subquery by the 2307 # following call to _parse_set_operations, but instead becomes the parent node 2308 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2309 elif self._match(TokenType.VALUES): 2310 this = self.expression( 2311 exp.Values, 2312 expressions=self._parse_csv(self._parse_value), 2313 alias=self._parse_table_alias(), 2314 ) 2315 elif from_: 2316 this = exp.select("*").from_(from_.this, copy=False) 2317 else: 2318 this = None 2319 2320 if parse_set_operation: 2321 return self._parse_set_operations(this) 2322 return this 2323 2324 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2325 if not skip_with_token and not self._match(TokenType.WITH): 2326 return None 2327 2328 comments = self._prev_comments 2329 recursive = self._match(TokenType.RECURSIVE) 2330 2331 expressions = [] 2332 while True: 2333 expressions.append(self._parse_cte()) 2334 2335 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2336 break 2337 else: 2338 self._match(TokenType.WITH) 2339 2340 return self.expression( 2341 exp.With, comments=comments, expressions=expressions, recursive=recursive 2342 ) 2343 2344 def _parse_cte(self) -> exp.CTE: 2345 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2346 if not alias or not alias.this: 2347 self.raise_error("Expected CTE to have alias") 2348 2349 self._match(TokenType.ALIAS) 2350 return self.expression( 2351 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2352 ) 2353 2354 def _parse_table_alias( 2355 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2356 ) -> t.Optional[exp.TableAlias]: 2357 any_token = self._match(TokenType.ALIAS) 2358 alias = ( 2359 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2360 or self._parse_string_as_identifier() 2361 ) 2362 2363 index = self._index 2364 if self._match(TokenType.L_PAREN): 2365 columns = self._parse_csv(self._parse_function_parameter) 2366 self._match_r_paren() if columns else self._retreat(index) 2367 else: 2368 columns = None 2369 2370 if not alias and not columns: 2371 return None 2372 2373 return self.expression(exp.TableAlias, this=alias, columns=columns) 2374 2375 def _parse_subquery( 2376 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2377 ) -> t.Optional[exp.Subquery]: 2378 if not this: 2379 return None 2380 2381 return self.expression( 2382 exp.Subquery, 2383 this=this, 2384 pivots=self._parse_pivots(), 2385 alias=self._parse_table_alias() if parse_alias else None, 2386 ) 2387 2388 def _parse_query_modifiers( 2389 self, this: t.Optional[exp.Expression] 2390 ) -> t.Optional[exp.Expression]: 2391 if isinstance(this, self.MODIFIABLES): 2392 for join in iter(self._parse_join, None): 2393 this.append("joins", join) 2394 for lateral in iter(self._parse_lateral, None): 2395 this.append("laterals", lateral) 2396 2397 while True: 2398 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2399 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2400 key, expression = parser(self) 2401 2402 if expression: 2403 this.set(key, expression) 2404 if key == "limit": 2405 offset = expression.args.pop("offset", None) 2406 if offset: 2407 this.set("offset", exp.Offset(expression=offset)) 2408 continue 2409 break 2410 return this 2411 2412 def _parse_hint(self) -> t.Optional[exp.Hint]: 2413 if self._match(TokenType.HINT): 2414 hints = [] 2415 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2416 hints.extend(hint) 2417 2418 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2419 self.raise_error("Expected */ after HINT") 2420 2421 return self.expression(exp.Hint, expressions=hints) 2422 2423 return None 2424 2425 def _parse_into(self) -> t.Optional[exp.Into]: 2426 if not self._match(TokenType.INTO): 2427 return None 2428 2429 temp = self._match(TokenType.TEMPORARY) 2430 unlogged = self._match_text_seq("UNLOGGED") 2431 self._match(TokenType.TABLE) 2432 2433 return self.expression( 2434 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2435 ) 2436 2437 def _parse_from( 2438 self, joins: bool = False, skip_from_token: bool = False 2439 ) -> t.Optional[exp.From]: 2440 if not skip_from_token and not self._match(TokenType.FROM): 2441 return None 2442 2443 return self.expression( 2444 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2445 ) 2446 2447 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2448 if not self._match(TokenType.MATCH_RECOGNIZE): 2449 return None 2450 2451 self._match_l_paren() 2452 2453 partition = self._parse_partition_by() 2454 order = self._parse_order() 2455 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2456 2457 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2458 rows = exp.var("ONE ROW PER MATCH") 2459 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2460 text = "ALL ROWS PER MATCH" 2461 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2462 text += f" SHOW EMPTY MATCHES" 2463 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2464 text += f" OMIT EMPTY MATCHES" 2465 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2466 text += f" WITH UNMATCHED ROWS" 2467 rows = exp.var(text) 2468 else: 2469 rows = None 2470 2471 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2472 text = "AFTER MATCH SKIP" 2473 if self._match_text_seq("PAST", "LAST", "ROW"): 2474 text += f" PAST LAST ROW" 2475 elif self._match_text_seq("TO", "NEXT", "ROW"): 2476 text += f" TO NEXT ROW" 2477 elif self._match_text_seq("TO", "FIRST"): 2478 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2479 elif self._match_text_seq("TO", "LAST"): 2480 text += f" TO LAST {self._advance_any().text}" # type: ignore 2481 after = exp.var(text) 2482 else: 2483 after = None 2484 2485 if self._match_text_seq("PATTERN"): 2486 self._match_l_paren() 2487 2488 if not self._curr: 2489 self.raise_error("Expecting )", self._curr) 2490 2491 paren = 1 2492 start = self._curr 2493 2494 while self._curr and paren > 0: 2495 if self._curr.token_type == TokenType.L_PAREN: 2496 paren += 1 2497 if self._curr.token_type == TokenType.R_PAREN: 2498 paren -= 1 2499 2500 end = self._prev 2501 self._advance() 2502 2503 if paren > 0: 2504 self.raise_error("Expecting )", self._curr) 2505 2506 pattern = exp.var(self._find_sql(start, end)) 2507 else: 2508 pattern = None 2509 2510 define = ( 2511 self._parse_csv(self._parse_name_as_expression) 2512 if self._match_text_seq("DEFINE") 2513 else None 2514 ) 2515 2516 self._match_r_paren() 2517 2518 return self.expression( 2519 exp.MatchRecognize, 2520 partition_by=partition, 2521 order=order, 2522 measures=measures, 2523 rows=rows, 2524 after=after, 2525 pattern=pattern, 2526 define=define, 2527 alias=self._parse_table_alias(), 2528 ) 2529 2530 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2531 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2532 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2533 cross_apply = False 2534 2535 if cross_apply is not None: 2536 this = self._parse_select(table=True) 2537 view = None 2538 outer = None 2539 elif self._match(TokenType.LATERAL): 2540 this = self._parse_select(table=True) 2541 view = self._match(TokenType.VIEW) 2542 outer = self._match(TokenType.OUTER) 2543 else: 2544 return None 2545 2546 if not this: 2547 this = ( 2548 self._parse_unnest() 2549 or self._parse_function() 2550 or self._parse_id_var(any_token=False) 2551 ) 2552 2553 while self._match(TokenType.DOT): 2554 this = exp.Dot( 2555 this=this, 2556 expression=self._parse_function() or self._parse_id_var(any_token=False), 2557 ) 2558 2559 if view: 2560 table = self._parse_id_var(any_token=False) 2561 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2562 table_alias: t.Optional[exp.TableAlias] = self.expression( 2563 exp.TableAlias, this=table, columns=columns 2564 ) 2565 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2566 # We move the alias from the lateral's child node to the lateral itself 2567 table_alias = this.args["alias"].pop() 2568 else: 2569 table_alias = self._parse_table_alias() 2570 2571 return self.expression( 2572 exp.Lateral, 2573 this=this, 2574 view=view, 2575 outer=outer, 2576 alias=table_alias, 2577 cross_apply=cross_apply, 2578 ) 2579 2580 def _parse_join_parts( 2581 self, 2582 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2583 return ( 2584 self._match_set(self.JOIN_METHODS) and self._prev, 2585 self._match_set(self.JOIN_SIDES) and self._prev, 2586 self._match_set(self.JOIN_KINDS) and self._prev, 2587 ) 2588 2589 def _parse_join( 2590 self, skip_join_token: bool = False, parse_bracket: bool = False 2591 ) -> t.Optional[exp.Join]: 2592 if self._match(TokenType.COMMA): 2593 return self.expression(exp.Join, this=self._parse_table()) 2594 2595 index = self._index 2596 method, side, kind = self._parse_join_parts() 2597 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2598 join = self._match(TokenType.JOIN) 2599 2600 if not skip_join_token and not join: 2601 self._retreat(index) 2602 kind = None 2603 method = None 2604 side = None 2605 2606 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2607 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2608 2609 if not skip_join_token and not join and not outer_apply and not cross_apply: 2610 return None 2611 2612 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2613 2614 if method: 2615 kwargs["method"] = method.text 2616 if side: 2617 kwargs["side"] = side.text 2618 if kind: 2619 kwargs["kind"] = kind.text 2620 if hint: 2621 kwargs["hint"] = hint 2622 2623 if self._match(TokenType.ON): 2624 kwargs["on"] = self._parse_conjunction() 2625 elif self._match(TokenType.USING): 2626 kwargs["using"] = self._parse_wrapped_id_vars() 2627 elif not (kind and kind.token_type == TokenType.CROSS): 2628 index = self._index 2629 join = self._parse_join() 2630 2631 if join and self._match(TokenType.ON): 2632 kwargs["on"] = self._parse_conjunction() 2633 elif join and self._match(TokenType.USING): 2634 kwargs["using"] = self._parse_wrapped_id_vars() 2635 else: 2636 join = None 2637 self._retreat(index) 2638 2639 kwargs["this"].set("joins", [join] if join else None) 2640 2641 comments = [c for token in (method, side, kind) if token for c in token.comments] 2642 return self.expression(exp.Join, comments=comments, **kwargs) 2643 2644 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2645 this = self._parse_conjunction() 2646 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2647 return this 2648 2649 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2650 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2651 2652 return this 2653 2654 def _parse_index( 2655 self, 2656 index: t.Optional[exp.Expression] = None, 2657 ) -> t.Optional[exp.Index]: 2658 if index: 2659 unique = None 2660 primary = None 2661 amp = None 2662 2663 self._match(TokenType.ON) 2664 self._match(TokenType.TABLE) # hive 2665 table = self._parse_table_parts(schema=True) 2666 else: 2667 unique = self._match(TokenType.UNIQUE) 2668 primary = self._match_text_seq("PRIMARY") 2669 amp = self._match_text_seq("AMP") 2670 2671 if not self._match(TokenType.INDEX): 2672 return None 2673 2674 index = self._parse_id_var() 2675 table = None 2676 2677 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2678 2679 if self._match(TokenType.L_PAREN, advance=False): 2680 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2681 else: 2682 columns = None 2683 2684 return self.expression( 2685 exp.Index, 2686 this=index, 2687 table=table, 2688 using=using, 2689 columns=columns, 2690 unique=unique, 2691 primary=primary, 2692 amp=amp, 2693 partition_by=self._parse_partition_by(), 2694 where=self._parse_where(), 2695 ) 2696 2697 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2698 hints: t.List[exp.Expression] = [] 2699 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2700 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2701 hints.append( 2702 self.expression( 2703 exp.WithTableHint, 2704 expressions=self._parse_csv( 2705 lambda: self._parse_function() or self._parse_var(any_token=True) 2706 ), 2707 ) 2708 ) 2709 self._match_r_paren() 2710 else: 2711 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2712 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2713 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2714 2715 self._match_texts(("INDEX", "KEY")) 2716 if self._match(TokenType.FOR): 2717 hint.set("target", self._advance_any() and self._prev.text.upper()) 2718 2719 hint.set("expressions", self._parse_wrapped_id_vars()) 2720 hints.append(hint) 2721 2722 return hints or None 2723 2724 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2725 return ( 2726 (not schema and self._parse_function(optional_parens=False)) 2727 or self._parse_id_var(any_token=False) 2728 or self._parse_string_as_identifier() 2729 or self._parse_placeholder() 2730 ) 2731 2732 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2733 catalog = None 2734 db = None 2735 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2736 2737 while self._match(TokenType.DOT): 2738 if catalog: 2739 # This allows nesting the table in arbitrarily many dot expressions if needed 2740 table = self.expression( 2741 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2742 ) 2743 else: 2744 catalog = db 2745 db = table 2746 table = self._parse_table_part(schema=schema) or "" 2747 2748 if not table: 2749 self.raise_error(f"Expected table name but got {self._curr}") 2750 2751 return self.expression( 2752 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2753 ) 2754 2755 def _parse_table( 2756 self, 2757 schema: bool = False, 2758 joins: bool = False, 2759 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2760 parse_bracket: bool = False, 2761 ) -> t.Optional[exp.Expression]: 2762 lateral = self._parse_lateral() 2763 if lateral: 2764 return lateral 2765 2766 unnest = self._parse_unnest() 2767 if unnest: 2768 return unnest 2769 2770 values = self._parse_derived_table_values() 2771 if values: 2772 return values 2773 2774 subquery = self._parse_select(table=True) 2775 if subquery: 2776 if not subquery.args.get("pivots"): 2777 subquery.set("pivots", self._parse_pivots()) 2778 return subquery 2779 2780 bracket = parse_bracket and self._parse_bracket(None) 2781 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2782 this = t.cast( 2783 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2784 ) 2785 2786 if schema: 2787 return self._parse_schema(this=this) 2788 2789 version = self._parse_version() 2790 2791 if version: 2792 this.set("version", version) 2793 2794 if self.dialect.ALIAS_POST_TABLESAMPLE: 2795 table_sample = self._parse_table_sample() 2796 2797 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2798 if alias: 2799 this.set("alias", alias) 2800 2801 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2802 return self.expression( 2803 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2804 ) 2805 2806 this.set("hints", self._parse_table_hints()) 2807 2808 if not this.args.get("pivots"): 2809 this.set("pivots", self._parse_pivots()) 2810 2811 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2812 table_sample = self._parse_table_sample() 2813 2814 if table_sample: 2815 table_sample.set("this", this) 2816 this = table_sample 2817 2818 if joins: 2819 for join in iter(self._parse_join, None): 2820 this.append("joins", join) 2821 2822 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2823 this.set("ordinality", True) 2824 this.set("alias", self._parse_table_alias()) 2825 2826 return this 2827 2828 def _parse_version(self) -> t.Optional[exp.Version]: 2829 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2830 this = "TIMESTAMP" 2831 elif self._match(TokenType.VERSION_SNAPSHOT): 2832 this = "VERSION" 2833 else: 2834 return None 2835 2836 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2837 kind = self._prev.text.upper() 2838 start = self._parse_bitwise() 2839 self._match_texts(("TO", "AND")) 2840 end = self._parse_bitwise() 2841 expression: t.Optional[exp.Expression] = self.expression( 2842 exp.Tuple, expressions=[start, end] 2843 ) 2844 elif self._match_text_seq("CONTAINED", "IN"): 2845 kind = "CONTAINED IN" 2846 expression = self.expression( 2847 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2848 ) 2849 elif self._match(TokenType.ALL): 2850 kind = "ALL" 2851 expression = None 2852 else: 2853 self._match_text_seq("AS", "OF") 2854 kind = "AS OF" 2855 expression = self._parse_type() 2856 2857 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2858 2859 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2860 if not self._match(TokenType.UNNEST): 2861 return None 2862 2863 expressions = self._parse_wrapped_csv(self._parse_equality) 2864 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2865 2866 alias = self._parse_table_alias() if with_alias else None 2867 2868 if alias: 2869 if self.dialect.UNNEST_COLUMN_ONLY: 2870 if alias.args.get("columns"): 2871 self.raise_error("Unexpected extra column alias in unnest.") 2872 2873 alias.set("columns", [alias.this]) 2874 alias.set("this", None) 2875 2876 columns = alias.args.get("columns") or [] 2877 if offset and len(expressions) < len(columns): 2878 offset = columns.pop() 2879 2880 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2881 self._match(TokenType.ALIAS) 2882 offset = self._parse_id_var( 2883 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2884 ) or exp.to_identifier("offset") 2885 2886 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2887 2888 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2889 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2890 if not is_derived and not self._match(TokenType.VALUES): 2891 return None 2892 2893 expressions = self._parse_csv(self._parse_value) 2894 alias = self._parse_table_alias() 2895 2896 if is_derived: 2897 self._match_r_paren() 2898 2899 return self.expression( 2900 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2901 ) 2902 2903 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2904 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2905 as_modifier and self._match_text_seq("USING", "SAMPLE") 2906 ): 2907 return None 2908 2909 bucket_numerator = None 2910 bucket_denominator = None 2911 bucket_field = None 2912 percent = None 2913 size = None 2914 seed = None 2915 2916 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 2917 matched_l_paren = self._match(TokenType.L_PAREN) 2918 2919 if self.TABLESAMPLE_CSV: 2920 num = None 2921 expressions = self._parse_csv(self._parse_primary) 2922 else: 2923 expressions = None 2924 num = ( 2925 self._parse_factor() 2926 if self._match(TokenType.NUMBER, advance=False) 2927 else self._parse_primary() or self._parse_placeholder() 2928 ) 2929 2930 if self._match_text_seq("BUCKET"): 2931 bucket_numerator = self._parse_number() 2932 self._match_text_seq("OUT", "OF") 2933 bucket_denominator = bucket_denominator = self._parse_number() 2934 self._match(TokenType.ON) 2935 bucket_field = self._parse_field() 2936 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2937 percent = num 2938 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 2939 size = num 2940 else: 2941 percent = num 2942 2943 if matched_l_paren: 2944 self._match_r_paren() 2945 2946 if self._match(TokenType.L_PAREN): 2947 method = self._parse_var(upper=True) 2948 seed = self._match(TokenType.COMMA) and self._parse_number() 2949 self._match_r_paren() 2950 elif self._match_texts(("SEED", "REPEATABLE")): 2951 seed = self._parse_wrapped(self._parse_number) 2952 2953 return self.expression( 2954 exp.TableSample, 2955 expressions=expressions, 2956 method=method, 2957 bucket_numerator=bucket_numerator, 2958 bucket_denominator=bucket_denominator, 2959 bucket_field=bucket_field, 2960 percent=percent, 2961 size=size, 2962 seed=seed, 2963 ) 2964 2965 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2966 return list(iter(self._parse_pivot, None)) or None 2967 2968 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2969 return list(iter(self._parse_join, None)) or None 2970 2971 # https://duckdb.org/docs/sql/statements/pivot 2972 def _parse_simplified_pivot(self) -> exp.Pivot: 2973 def _parse_on() -> t.Optional[exp.Expression]: 2974 this = self._parse_bitwise() 2975 return self._parse_in(this) if self._match(TokenType.IN) else this 2976 2977 this = self._parse_table() 2978 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2979 using = self._match(TokenType.USING) and self._parse_csv( 2980 lambda: self._parse_alias(self._parse_function()) 2981 ) 2982 group = self._parse_group() 2983 return self.expression( 2984 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2985 ) 2986 2987 def _parse_pivot_in(self) -> exp.In: 2988 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 2989 this = self._parse_conjunction() 2990 2991 self._match(TokenType.ALIAS) 2992 alias = self._parse_field() 2993 if alias: 2994 return self.expression(exp.PivotAlias, this=this, alias=alias) 2995 2996 return this 2997 2998 value = self._parse_column() 2999 3000 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3001 self.raise_error("Expecting IN (") 3002 3003 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3004 3005 self._match_r_paren() 3006 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3007 3008 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3009 index = self._index 3010 include_nulls = None 3011 3012 if self._match(TokenType.PIVOT): 3013 unpivot = False 3014 elif self._match(TokenType.UNPIVOT): 3015 unpivot = True 3016 3017 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3018 if self._match_text_seq("INCLUDE", "NULLS"): 3019 include_nulls = True 3020 elif self._match_text_seq("EXCLUDE", "NULLS"): 3021 include_nulls = False 3022 else: 3023 return None 3024 3025 expressions = [] 3026 3027 if not self._match(TokenType.L_PAREN): 3028 self._retreat(index) 3029 return None 3030 3031 if unpivot: 3032 expressions = self._parse_csv(self._parse_column) 3033 else: 3034 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3035 3036 if not expressions: 3037 self.raise_error("Failed to parse PIVOT's aggregation list") 3038 3039 if not self._match(TokenType.FOR): 3040 self.raise_error("Expecting FOR") 3041 3042 field = self._parse_pivot_in() 3043 3044 self._match_r_paren() 3045 3046 pivot = self.expression( 3047 exp.Pivot, 3048 expressions=expressions, 3049 field=field, 3050 unpivot=unpivot, 3051 include_nulls=include_nulls, 3052 ) 3053 3054 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3055 pivot.set("alias", self._parse_table_alias()) 3056 3057 if not unpivot: 3058 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3059 3060 columns: t.List[exp.Expression] = [] 3061 for fld in pivot.args["field"].expressions: 3062 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3063 for name in names: 3064 if self.PREFIXED_PIVOT_COLUMNS: 3065 name = f"{name}_{field_name}" if name else field_name 3066 else: 3067 name = f"{field_name}_{name}" if name else field_name 3068 3069 columns.append(exp.to_identifier(name)) 3070 3071 pivot.set("columns", columns) 3072 3073 return pivot 3074 3075 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3076 return [agg.alias for agg in aggregations] 3077 3078 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3079 if not skip_where_token and not self._match(TokenType.WHERE): 3080 return None 3081 3082 return self.expression( 3083 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3084 ) 3085 3086 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3087 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3088 return None 3089 3090 elements = defaultdict(list) 3091 3092 if self._match(TokenType.ALL): 3093 return self.expression(exp.Group, all=True) 3094 3095 while True: 3096 expressions = self._parse_csv(self._parse_conjunction) 3097 if expressions: 3098 elements["expressions"].extend(expressions) 3099 3100 grouping_sets = self._parse_grouping_sets() 3101 if grouping_sets: 3102 elements["grouping_sets"].extend(grouping_sets) 3103 3104 rollup = None 3105 cube = None 3106 totals = None 3107 3108 index = self._index 3109 with_ = self._match(TokenType.WITH) 3110 if self._match(TokenType.ROLLUP): 3111 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3112 elements["rollup"].extend(ensure_list(rollup)) 3113 3114 if self._match(TokenType.CUBE): 3115 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3116 elements["cube"].extend(ensure_list(cube)) 3117 3118 if self._match_text_seq("TOTALS"): 3119 totals = True 3120 elements["totals"] = True # type: ignore 3121 3122 if not (grouping_sets or rollup or cube or totals): 3123 if with_: 3124 self._retreat(index) 3125 break 3126 3127 return self.expression(exp.Group, **elements) # type: ignore 3128 3129 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3130 if not self._match(TokenType.GROUPING_SETS): 3131 return None 3132 3133 return self._parse_wrapped_csv(self._parse_grouping_set) 3134 3135 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3136 if self._match(TokenType.L_PAREN): 3137 grouping_set = self._parse_csv(self._parse_column) 3138 self._match_r_paren() 3139 return self.expression(exp.Tuple, expressions=grouping_set) 3140 3141 return self._parse_column() 3142 3143 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3144 if not skip_having_token and not self._match(TokenType.HAVING): 3145 return None 3146 return self.expression(exp.Having, this=self._parse_conjunction()) 3147 3148 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3149 if not self._match(TokenType.QUALIFY): 3150 return None 3151 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3152 3153 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3154 if skip_start_token: 3155 start = None 3156 elif self._match(TokenType.START_WITH): 3157 start = self._parse_conjunction() 3158 else: 3159 return None 3160 3161 self._match(TokenType.CONNECT_BY) 3162 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3163 exp.Prior, this=self._parse_bitwise() 3164 ) 3165 connect = self._parse_conjunction() 3166 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3167 3168 if not start and self._match(TokenType.START_WITH): 3169 start = self._parse_conjunction() 3170 3171 return self.expression(exp.Connect, start=start, connect=connect) 3172 3173 def _parse_name_as_expression(self) -> exp.Alias: 3174 return self.expression( 3175 exp.Alias, 3176 alias=self._parse_id_var(any_token=True), 3177 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3178 ) 3179 3180 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3181 if self._match_text_seq("INTERPOLATE"): 3182 return self._parse_wrapped_csv(self._parse_name_as_expression) 3183 return None 3184 3185 def _parse_order( 3186 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3187 ) -> t.Optional[exp.Expression]: 3188 siblings = None 3189 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3190 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3191 return this 3192 3193 siblings = True 3194 3195 return self.expression( 3196 exp.Order, 3197 this=this, 3198 expressions=self._parse_csv(self._parse_ordered), 3199 interpolate=self._parse_interpolate(), 3200 siblings=siblings, 3201 ) 3202 3203 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3204 if not self._match(token): 3205 return None 3206 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3207 3208 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3209 this = parse_method() if parse_method else self._parse_conjunction() 3210 3211 asc = self._match(TokenType.ASC) 3212 desc = self._match(TokenType.DESC) or (asc and False) 3213 3214 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3215 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3216 3217 nulls_first = is_nulls_first or False 3218 explicitly_null_ordered = is_nulls_first or is_nulls_last 3219 3220 if ( 3221 not explicitly_null_ordered 3222 and ( 3223 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3224 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3225 ) 3226 and self.dialect.NULL_ORDERING != "nulls_are_last" 3227 ): 3228 nulls_first = True 3229 3230 if self._match_text_seq("WITH", "FILL"): 3231 with_fill = self.expression( 3232 exp.WithFill, 3233 **{ # type: ignore 3234 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3235 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3236 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3237 }, 3238 ) 3239 else: 3240 with_fill = None 3241 3242 return self.expression( 3243 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3244 ) 3245 3246 def _parse_limit( 3247 self, this: t.Optional[exp.Expression] = None, top: bool = False 3248 ) -> t.Optional[exp.Expression]: 3249 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3250 comments = self._prev_comments 3251 if top: 3252 limit_paren = self._match(TokenType.L_PAREN) 3253 expression = self._parse_term() if limit_paren else self._parse_number() 3254 3255 if limit_paren: 3256 self._match_r_paren() 3257 else: 3258 expression = self._parse_term() 3259 3260 if self._match(TokenType.COMMA): 3261 offset = expression 3262 expression = self._parse_term() 3263 else: 3264 offset = None 3265 3266 limit_exp = self.expression( 3267 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3268 ) 3269 3270 return limit_exp 3271 3272 if self._match(TokenType.FETCH): 3273 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3274 direction = self._prev.text.upper() if direction else "FIRST" 3275 3276 count = self._parse_field(tokens=self.FETCH_TOKENS) 3277 percent = self._match(TokenType.PERCENT) 3278 3279 self._match_set((TokenType.ROW, TokenType.ROWS)) 3280 3281 only = self._match_text_seq("ONLY") 3282 with_ties = self._match_text_seq("WITH", "TIES") 3283 3284 if only and with_ties: 3285 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3286 3287 return self.expression( 3288 exp.Fetch, 3289 direction=direction, 3290 count=count, 3291 percent=percent, 3292 with_ties=with_ties, 3293 ) 3294 3295 return this 3296 3297 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3298 if not self._match(TokenType.OFFSET): 3299 return this 3300 3301 count = self._parse_term() 3302 self._match_set((TokenType.ROW, TokenType.ROWS)) 3303 return self.expression(exp.Offset, this=this, expression=count) 3304 3305 def _parse_locks(self) -> t.List[exp.Lock]: 3306 locks = [] 3307 while True: 3308 if self._match_text_seq("FOR", "UPDATE"): 3309 update = True 3310 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3311 "LOCK", "IN", "SHARE", "MODE" 3312 ): 3313 update = False 3314 else: 3315 break 3316 3317 expressions = None 3318 if self._match_text_seq("OF"): 3319 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3320 3321 wait: t.Optional[bool | exp.Expression] = None 3322 if self._match_text_seq("NOWAIT"): 3323 wait = True 3324 elif self._match_text_seq("WAIT"): 3325 wait = self._parse_primary() 3326 elif self._match_text_seq("SKIP", "LOCKED"): 3327 wait = False 3328 3329 locks.append( 3330 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3331 ) 3332 3333 return locks 3334 3335 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3336 while this and self._match_set(self.SET_OPERATIONS): 3337 token_type = self._prev.token_type 3338 3339 if token_type == TokenType.UNION: 3340 operation = exp.Union 3341 elif token_type == TokenType.EXCEPT: 3342 operation = exp.Except 3343 else: 3344 operation = exp.Intersect 3345 3346 comments = self._prev.comments 3347 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3348 by_name = self._match_text_seq("BY", "NAME") 3349 expression = self._parse_select(nested=True, parse_set_operation=False) 3350 3351 this = self.expression( 3352 operation, 3353 comments=comments, 3354 this=this, 3355 distinct=distinct, 3356 by_name=by_name, 3357 expression=expression, 3358 ) 3359 3360 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3361 expression = this.expression 3362 3363 if expression: 3364 for arg in self.UNION_MODIFIERS: 3365 expr = expression.args.get(arg) 3366 if expr: 3367 this.set(arg, expr.pop()) 3368 3369 return this 3370 3371 def _parse_expression(self) -> t.Optional[exp.Expression]: 3372 return self._parse_alias(self._parse_conjunction()) 3373 3374 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3375 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3376 3377 def _parse_equality(self) -> t.Optional[exp.Expression]: 3378 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3379 3380 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3381 return self._parse_tokens(self._parse_range, self.COMPARISON) 3382 3383 def _parse_range(self) -> t.Optional[exp.Expression]: 3384 this = self._parse_bitwise() 3385 negate = self._match(TokenType.NOT) 3386 3387 if self._match_set(self.RANGE_PARSERS): 3388 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3389 if not expression: 3390 return this 3391 3392 this = expression 3393 elif self._match(TokenType.ISNULL): 3394 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3395 3396 # Postgres supports ISNULL and NOTNULL for conditions. 3397 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3398 if self._match(TokenType.NOTNULL): 3399 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3400 this = self.expression(exp.Not, this=this) 3401 3402 if negate: 3403 this = self.expression(exp.Not, this=this) 3404 3405 if self._match(TokenType.IS): 3406 this = self._parse_is(this) 3407 3408 return this 3409 3410 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3411 index = self._index - 1 3412 negate = self._match(TokenType.NOT) 3413 3414 if self._match_text_seq("DISTINCT", "FROM"): 3415 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3416 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3417 3418 expression = self._parse_null() or self._parse_boolean() 3419 if not expression: 3420 self._retreat(index) 3421 return None 3422 3423 this = self.expression(exp.Is, this=this, expression=expression) 3424 return self.expression(exp.Not, this=this) if negate else this 3425 3426 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3427 unnest = self._parse_unnest(with_alias=False) 3428 if unnest: 3429 this = self.expression(exp.In, this=this, unnest=unnest) 3430 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3431 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3432 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3433 3434 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3435 this = self.expression(exp.In, this=this, query=expressions[0]) 3436 else: 3437 this = self.expression(exp.In, this=this, expressions=expressions) 3438 3439 if matched_l_paren: 3440 self._match_r_paren(this) 3441 elif not self._match(TokenType.R_BRACKET, expression=this): 3442 self.raise_error("Expecting ]") 3443 else: 3444 this = self.expression(exp.In, this=this, field=self._parse_field()) 3445 3446 return this 3447 3448 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3449 low = self._parse_bitwise() 3450 self._match(TokenType.AND) 3451 high = self._parse_bitwise() 3452 return self.expression(exp.Between, this=this, low=low, high=high) 3453 3454 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3455 if not self._match(TokenType.ESCAPE): 3456 return this 3457 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3458 3459 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3460 index = self._index 3461 3462 if not self._match(TokenType.INTERVAL) and match_interval: 3463 return None 3464 3465 if self._match(TokenType.STRING, advance=False): 3466 this = self._parse_primary() 3467 else: 3468 this = self._parse_term() 3469 3470 if not this or ( 3471 isinstance(this, exp.Column) 3472 and not this.table 3473 and not this.this.quoted 3474 and this.name.upper() == "IS" 3475 ): 3476 self._retreat(index) 3477 return None 3478 3479 unit = self._parse_function() or ( 3480 not self._match(TokenType.ALIAS, advance=False) 3481 and self._parse_var(any_token=True, upper=True) 3482 ) 3483 3484 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3485 # each INTERVAL expression into this canonical form so it's easy to transpile 3486 if this and this.is_number: 3487 this = exp.Literal.string(this.name) 3488 elif this and this.is_string: 3489 parts = this.name.split() 3490 3491 if len(parts) == 2: 3492 if unit: 3493 # This is not actually a unit, it's something else (e.g. a "window side") 3494 unit = None 3495 self._retreat(self._index - 1) 3496 3497 this = exp.Literal.string(parts[0]) 3498 unit = self.expression(exp.Var, this=parts[1].upper()) 3499 3500 return self.expression(exp.Interval, this=this, unit=unit) 3501 3502 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3503 this = self._parse_term() 3504 3505 while True: 3506 if self._match_set(self.BITWISE): 3507 this = self.expression( 3508 self.BITWISE[self._prev.token_type], 3509 this=this, 3510 expression=self._parse_term(), 3511 ) 3512 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3513 this = self.expression( 3514 exp.DPipe, 3515 this=this, 3516 expression=self._parse_term(), 3517 safe=not self.dialect.STRICT_STRING_CONCAT, 3518 ) 3519 elif self._match(TokenType.DQMARK): 3520 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3521 elif self._match_pair(TokenType.LT, TokenType.LT): 3522 this = self.expression( 3523 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3524 ) 3525 elif self._match_pair(TokenType.GT, TokenType.GT): 3526 this = self.expression( 3527 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3528 ) 3529 else: 3530 break 3531 3532 return this 3533 3534 def _parse_term(self) -> t.Optional[exp.Expression]: 3535 return self._parse_tokens(self._parse_factor, self.TERM) 3536 3537 def _parse_factor(self) -> t.Optional[exp.Expression]: 3538 if self.EXPONENT: 3539 factor = self._parse_tokens(self._parse_exponent, self.FACTOR) 3540 else: 3541 factor = self._parse_tokens(self._parse_unary, self.FACTOR) 3542 if isinstance(factor, exp.Div): 3543 factor.args["typed"] = self.dialect.TYPED_DIVISION 3544 factor.args["safe"] = self.dialect.SAFE_DIVISION 3545 return factor 3546 3547 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3548 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3549 3550 def _parse_unary(self) -> t.Optional[exp.Expression]: 3551 if self._match_set(self.UNARY_PARSERS): 3552 return self.UNARY_PARSERS[self._prev.token_type](self) 3553 return self._parse_at_time_zone(self._parse_type()) 3554 3555 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3556 interval = parse_interval and self._parse_interval() 3557 if interval: 3558 # Convert INTERVAL 'val_1' unit_1 ... 'val_n' unit_n into a sum of intervals 3559 while self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3560 interval = self.expression( # type: ignore 3561 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3562 ) 3563 3564 return interval 3565 3566 index = self._index 3567 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3568 this = self._parse_column() 3569 3570 if data_type: 3571 if isinstance(this, exp.Literal): 3572 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3573 if parser: 3574 return parser(self, this, data_type) 3575 return self.expression(exp.Cast, this=this, to=data_type) 3576 if not data_type.expressions: 3577 self._retreat(index) 3578 return self._parse_column() 3579 return self._parse_column_ops(data_type) 3580 3581 return this and self._parse_column_ops(this) 3582 3583 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3584 this = self._parse_type() 3585 if not this: 3586 return None 3587 3588 return self.expression( 3589 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3590 ) 3591 3592 def _parse_types( 3593 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3594 ) -> t.Optional[exp.Expression]: 3595 index = self._index 3596 3597 prefix = self._match_text_seq("SYSUDTLIB", ".") 3598 3599 if not self._match_set(self.TYPE_TOKENS): 3600 identifier = allow_identifiers and self._parse_id_var( 3601 any_token=False, tokens=(TokenType.VAR,) 3602 ) 3603 3604 if identifier: 3605 tokens = self.dialect.tokenize(identifier.name) 3606 3607 if len(tokens) != 1: 3608 self.raise_error("Unexpected identifier", self._prev) 3609 3610 if tokens[0].token_type in self.TYPE_TOKENS: 3611 self._prev = tokens[0] 3612 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3613 type_name = identifier.name 3614 3615 while self._match(TokenType.DOT): 3616 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3617 3618 return exp.DataType.build(type_name, udt=True) 3619 else: 3620 return None 3621 else: 3622 return None 3623 3624 type_token = self._prev.token_type 3625 3626 if type_token == TokenType.PSEUDO_TYPE: 3627 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3628 3629 if type_token == TokenType.OBJECT_IDENTIFIER: 3630 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3631 3632 nested = type_token in self.NESTED_TYPE_TOKENS 3633 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3634 expressions = None 3635 maybe_func = False 3636 3637 if self._match(TokenType.L_PAREN): 3638 if is_struct: 3639 expressions = self._parse_csv(self._parse_struct_types) 3640 elif nested: 3641 expressions = self._parse_csv( 3642 lambda: self._parse_types( 3643 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3644 ) 3645 ) 3646 elif type_token in self.ENUM_TYPE_TOKENS: 3647 expressions = self._parse_csv(self._parse_equality) 3648 else: 3649 expressions = self._parse_csv(self._parse_type_size) 3650 3651 if not expressions or not self._match(TokenType.R_PAREN): 3652 self._retreat(index) 3653 return None 3654 3655 maybe_func = True 3656 3657 this: t.Optional[exp.Expression] = None 3658 values: t.Optional[t.List[exp.Expression]] = None 3659 3660 if nested and self._match(TokenType.LT): 3661 if is_struct: 3662 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 3663 else: 3664 expressions = self._parse_csv( 3665 lambda: self._parse_types( 3666 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3667 ) 3668 ) 3669 3670 if not self._match(TokenType.GT): 3671 self.raise_error("Expecting >") 3672 3673 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3674 values = self._parse_csv(self._parse_conjunction) 3675 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3676 3677 if type_token in self.TIMESTAMPS: 3678 if self._match_text_seq("WITH", "TIME", "ZONE"): 3679 maybe_func = False 3680 tz_type = ( 3681 exp.DataType.Type.TIMETZ 3682 if type_token in self.TIMES 3683 else exp.DataType.Type.TIMESTAMPTZ 3684 ) 3685 this = exp.DataType(this=tz_type, expressions=expressions) 3686 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3687 maybe_func = False 3688 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3689 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3690 maybe_func = False 3691 elif type_token == TokenType.INTERVAL: 3692 unit = self._parse_var() 3693 3694 if self._match_text_seq("TO"): 3695 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3696 else: 3697 span = None 3698 3699 if span or not unit: 3700 this = self.expression( 3701 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3702 ) 3703 else: 3704 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3705 3706 if maybe_func and check_func: 3707 index2 = self._index 3708 peek = self._parse_string() 3709 3710 if not peek: 3711 self._retreat(index) 3712 return None 3713 3714 self._retreat(index2) 3715 3716 if not this: 3717 if self._match_text_seq("UNSIGNED"): 3718 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3719 if not unsigned_type_token: 3720 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3721 3722 type_token = unsigned_type_token or type_token 3723 3724 this = exp.DataType( 3725 this=exp.DataType.Type[type_token.value], 3726 expressions=expressions, 3727 nested=nested, 3728 values=values, 3729 prefix=prefix, 3730 ) 3731 3732 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3733 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3734 3735 return this 3736 3737 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 3738 index = self._index 3739 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3740 self._match(TokenType.COLON) 3741 column_def = self._parse_column_def(this) 3742 3743 if type_required and ( 3744 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 3745 ): 3746 self._retreat(index) 3747 return self._parse_types() 3748 3749 return column_def 3750 3751 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3752 if not self._match_text_seq("AT", "TIME", "ZONE"): 3753 return this 3754 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3755 3756 def _parse_column(self) -> t.Optional[exp.Expression]: 3757 this = self._parse_field() 3758 if isinstance(this, exp.Identifier): 3759 this = self.expression(exp.Column, this=this) 3760 elif not this: 3761 return self._parse_bracket(this) 3762 return self._parse_column_ops(this) 3763 3764 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3765 this = self._parse_bracket(this) 3766 3767 while self._match_set(self.COLUMN_OPERATORS): 3768 op_token = self._prev.token_type 3769 op = self.COLUMN_OPERATORS.get(op_token) 3770 3771 if op_token == TokenType.DCOLON: 3772 field = self._parse_types() 3773 if not field: 3774 self.raise_error("Expected type") 3775 elif op and self._curr: 3776 self._advance() 3777 value = self._prev.text 3778 field = ( 3779 exp.Literal.number(value) 3780 if self._prev.token_type == TokenType.NUMBER 3781 else exp.Literal.string(value) 3782 ) 3783 else: 3784 field = self._parse_field(anonymous_func=True, any_token=True) 3785 3786 if isinstance(field, exp.Func): 3787 # bigquery allows function calls like x.y.count(...) 3788 # SAFE.SUBSTR(...) 3789 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3790 this = self._replace_columns_with_dots(this) 3791 3792 if op: 3793 this = op(self, this, field) 3794 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3795 this = self.expression( 3796 exp.Column, 3797 this=field, 3798 table=this.this, 3799 db=this.args.get("table"), 3800 catalog=this.args.get("db"), 3801 ) 3802 else: 3803 this = self.expression(exp.Dot, this=this, expression=field) 3804 this = self._parse_bracket(this) 3805 return this 3806 3807 def _parse_primary(self) -> t.Optional[exp.Expression]: 3808 if self._match_set(self.PRIMARY_PARSERS): 3809 token_type = self._prev.token_type 3810 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3811 3812 if token_type == TokenType.STRING: 3813 expressions = [primary] 3814 while self._match(TokenType.STRING): 3815 expressions.append(exp.Literal.string(self._prev.text)) 3816 3817 if len(expressions) > 1: 3818 return self.expression(exp.Concat, expressions=expressions) 3819 3820 return primary 3821 3822 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3823 return exp.Literal.number(f"0.{self._prev.text}") 3824 3825 if self._match(TokenType.L_PAREN): 3826 comments = self._prev_comments 3827 query = self._parse_select() 3828 3829 if query: 3830 expressions = [query] 3831 else: 3832 expressions = self._parse_expressions() 3833 3834 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3835 3836 if isinstance(this, exp.Subqueryable): 3837 this = self._parse_set_operations( 3838 self._parse_subquery(this=this, parse_alias=False) 3839 ) 3840 elif len(expressions) > 1: 3841 this = self.expression(exp.Tuple, expressions=expressions) 3842 else: 3843 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3844 3845 if this: 3846 this.add_comments(comments) 3847 3848 self._match_r_paren(expression=this) 3849 return this 3850 3851 return None 3852 3853 def _parse_field( 3854 self, 3855 any_token: bool = False, 3856 tokens: t.Optional[t.Collection[TokenType]] = None, 3857 anonymous_func: bool = False, 3858 ) -> t.Optional[exp.Expression]: 3859 return ( 3860 self._parse_primary() 3861 or self._parse_function(anonymous=anonymous_func) 3862 or self._parse_id_var(any_token=any_token, tokens=tokens) 3863 ) 3864 3865 def _parse_function( 3866 self, 3867 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3868 anonymous: bool = False, 3869 optional_parens: bool = True, 3870 ) -> t.Optional[exp.Expression]: 3871 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3872 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3873 fn_syntax = False 3874 if ( 3875 self._match(TokenType.L_BRACE, advance=False) 3876 and self._next 3877 and self._next.text.upper() == "FN" 3878 ): 3879 self._advance(2) 3880 fn_syntax = True 3881 3882 func = self._parse_function_call( 3883 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3884 ) 3885 3886 if fn_syntax: 3887 self._match(TokenType.R_BRACE) 3888 3889 return func 3890 3891 def _parse_function_call( 3892 self, 3893 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3894 anonymous: bool = False, 3895 optional_parens: bool = True, 3896 ) -> t.Optional[exp.Expression]: 3897 if not self._curr: 3898 return None 3899 3900 comments = self._curr.comments 3901 token_type = self._curr.token_type 3902 this = self._curr.text 3903 upper = this.upper() 3904 3905 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3906 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3907 self._advance() 3908 return parser(self) 3909 3910 if not self._next or self._next.token_type != TokenType.L_PAREN: 3911 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3912 self._advance() 3913 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3914 3915 return None 3916 3917 if token_type not in self.FUNC_TOKENS: 3918 return None 3919 3920 self._advance(2) 3921 3922 parser = self.FUNCTION_PARSERS.get(upper) 3923 if parser and not anonymous: 3924 this = parser(self) 3925 else: 3926 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3927 3928 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3929 this = self.expression(subquery_predicate, this=self._parse_select()) 3930 self._match_r_paren() 3931 return this 3932 3933 if functions is None: 3934 functions = self.FUNCTIONS 3935 3936 function = functions.get(upper) 3937 3938 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3939 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3940 3941 if function and not anonymous: 3942 if "dialect" in function.__code__.co_varnames: 3943 func = function(args, dialect=self.dialect) 3944 else: 3945 func = function(args) 3946 3947 func = self.validate_expression(func, args) 3948 if not self.dialect.NORMALIZE_FUNCTIONS: 3949 func.meta["name"] = this 3950 3951 this = func 3952 else: 3953 this = self.expression(exp.Anonymous, this=this, expressions=args) 3954 3955 if isinstance(this, exp.Expression): 3956 this.add_comments(comments) 3957 3958 self._match_r_paren(this) 3959 return self._parse_window(this) 3960 3961 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3962 return self._parse_column_def(self._parse_id_var()) 3963 3964 def _parse_user_defined_function( 3965 self, kind: t.Optional[TokenType] = None 3966 ) -> t.Optional[exp.Expression]: 3967 this = self._parse_id_var() 3968 3969 while self._match(TokenType.DOT): 3970 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3971 3972 if not self._match(TokenType.L_PAREN): 3973 return this 3974 3975 expressions = self._parse_csv(self._parse_function_parameter) 3976 self._match_r_paren() 3977 return self.expression( 3978 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3979 ) 3980 3981 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3982 literal = self._parse_primary() 3983 if literal: 3984 return self.expression(exp.Introducer, this=token.text, expression=literal) 3985 3986 return self.expression(exp.Identifier, this=token.text) 3987 3988 def _parse_session_parameter(self) -> exp.SessionParameter: 3989 kind = None 3990 this = self._parse_id_var() or self._parse_primary() 3991 3992 if this and self._match(TokenType.DOT): 3993 kind = this.name 3994 this = self._parse_var() or self._parse_primary() 3995 3996 return self.expression(exp.SessionParameter, this=this, kind=kind) 3997 3998 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3999 index = self._index 4000 4001 if self._match(TokenType.L_PAREN): 4002 expressions = t.cast( 4003 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4004 ) 4005 4006 if not self._match(TokenType.R_PAREN): 4007 self._retreat(index) 4008 else: 4009 expressions = [self._parse_id_var()] 4010 4011 if self._match_set(self.LAMBDAS): 4012 return self.LAMBDAS[self._prev.token_type](self, expressions) 4013 4014 self._retreat(index) 4015 4016 this: t.Optional[exp.Expression] 4017 4018 if self._match(TokenType.DISTINCT): 4019 this = self.expression( 4020 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4021 ) 4022 else: 4023 this = self._parse_select_or_expression(alias=alias) 4024 4025 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 4026 4027 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4028 index = self._index 4029 4030 if not self.errors: 4031 try: 4032 if self._parse_select(nested=True): 4033 return this 4034 except ParseError: 4035 pass 4036 finally: 4037 self.errors.clear() 4038 self._retreat(index) 4039 4040 if not self._match(TokenType.L_PAREN): 4041 return this 4042 4043 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4044 4045 self._match_r_paren() 4046 return self.expression(exp.Schema, this=this, expressions=args) 4047 4048 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4049 return self._parse_column_def(self._parse_field(any_token=True)) 4050 4051 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4052 # column defs are not really columns, they're identifiers 4053 if isinstance(this, exp.Column): 4054 this = this.this 4055 4056 kind = self._parse_types(schema=True) 4057 4058 if self._match_text_seq("FOR", "ORDINALITY"): 4059 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4060 4061 constraints: t.List[exp.Expression] = [] 4062 4063 if not kind and self._match(TokenType.ALIAS): 4064 constraints.append( 4065 self.expression( 4066 exp.ComputedColumnConstraint, 4067 this=self._parse_conjunction(), 4068 persisted=self._match_text_seq("PERSISTED"), 4069 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4070 ) 4071 ) 4072 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4073 self._match(TokenType.ALIAS) 4074 constraints.append( 4075 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4076 ) 4077 4078 while True: 4079 constraint = self._parse_column_constraint() 4080 if not constraint: 4081 break 4082 constraints.append(constraint) 4083 4084 if not kind and not constraints: 4085 return this 4086 4087 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4088 4089 def _parse_auto_increment( 4090 self, 4091 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4092 start = None 4093 increment = None 4094 4095 if self._match(TokenType.L_PAREN, advance=False): 4096 args = self._parse_wrapped_csv(self._parse_bitwise) 4097 start = seq_get(args, 0) 4098 increment = seq_get(args, 1) 4099 elif self._match_text_seq("START"): 4100 start = self._parse_bitwise() 4101 self._match_text_seq("INCREMENT") 4102 increment = self._parse_bitwise() 4103 4104 if start and increment: 4105 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4106 4107 return exp.AutoIncrementColumnConstraint() 4108 4109 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4110 if not self._match_text_seq("REFRESH"): 4111 self._retreat(self._index - 1) 4112 return None 4113 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4114 4115 def _parse_compress(self) -> exp.CompressColumnConstraint: 4116 if self._match(TokenType.L_PAREN, advance=False): 4117 return self.expression( 4118 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4119 ) 4120 4121 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4122 4123 def _parse_generated_as_identity( 4124 self, 4125 ) -> ( 4126 exp.GeneratedAsIdentityColumnConstraint 4127 | exp.ComputedColumnConstraint 4128 | exp.GeneratedAsRowColumnConstraint 4129 ): 4130 if self._match_text_seq("BY", "DEFAULT"): 4131 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4132 this = self.expression( 4133 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4134 ) 4135 else: 4136 self._match_text_seq("ALWAYS") 4137 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4138 4139 self._match(TokenType.ALIAS) 4140 4141 if self._match_text_seq("ROW"): 4142 start = self._match_text_seq("START") 4143 if not start: 4144 self._match(TokenType.END) 4145 hidden = self._match_text_seq("HIDDEN") 4146 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4147 4148 identity = self._match_text_seq("IDENTITY") 4149 4150 if self._match(TokenType.L_PAREN): 4151 if self._match(TokenType.START_WITH): 4152 this.set("start", self._parse_bitwise()) 4153 if self._match_text_seq("INCREMENT", "BY"): 4154 this.set("increment", self._parse_bitwise()) 4155 if self._match_text_seq("MINVALUE"): 4156 this.set("minvalue", self._parse_bitwise()) 4157 if self._match_text_seq("MAXVALUE"): 4158 this.set("maxvalue", self._parse_bitwise()) 4159 4160 if self._match_text_seq("CYCLE"): 4161 this.set("cycle", True) 4162 elif self._match_text_seq("NO", "CYCLE"): 4163 this.set("cycle", False) 4164 4165 if not identity: 4166 this.set("expression", self._parse_bitwise()) 4167 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4168 args = self._parse_csv(self._parse_bitwise) 4169 this.set("start", seq_get(args, 0)) 4170 this.set("increment", seq_get(args, 1)) 4171 4172 self._match_r_paren() 4173 4174 return this 4175 4176 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4177 self._match_text_seq("LENGTH") 4178 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4179 4180 def _parse_not_constraint( 4181 self, 4182 ) -> t.Optional[exp.Expression]: 4183 if self._match_text_seq("NULL"): 4184 return self.expression(exp.NotNullColumnConstraint) 4185 if self._match_text_seq("CASESPECIFIC"): 4186 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4187 if self._match_text_seq("FOR", "REPLICATION"): 4188 return self.expression(exp.NotForReplicationColumnConstraint) 4189 return None 4190 4191 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4192 if self._match(TokenType.CONSTRAINT): 4193 this = self._parse_id_var() 4194 else: 4195 this = None 4196 4197 if self._match_texts(self.CONSTRAINT_PARSERS): 4198 return self.expression( 4199 exp.ColumnConstraint, 4200 this=this, 4201 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4202 ) 4203 4204 return this 4205 4206 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4207 if not self._match(TokenType.CONSTRAINT): 4208 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4209 4210 this = self._parse_id_var() 4211 expressions = [] 4212 4213 while True: 4214 constraint = self._parse_unnamed_constraint() or self._parse_function() 4215 if not constraint: 4216 break 4217 expressions.append(constraint) 4218 4219 return self.expression(exp.Constraint, this=this, expressions=expressions) 4220 4221 def _parse_unnamed_constraint( 4222 self, constraints: t.Optional[t.Collection[str]] = None 4223 ) -> t.Optional[exp.Expression]: 4224 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4225 constraints or self.CONSTRAINT_PARSERS 4226 ): 4227 return None 4228 4229 constraint = self._prev.text.upper() 4230 if constraint not in self.CONSTRAINT_PARSERS: 4231 self.raise_error(f"No parser found for schema constraint {constraint}.") 4232 4233 return self.CONSTRAINT_PARSERS[constraint](self) 4234 4235 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4236 self._match_text_seq("KEY") 4237 return self.expression( 4238 exp.UniqueColumnConstraint, 4239 this=self._parse_schema(self._parse_id_var(any_token=False)), 4240 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4241 ) 4242 4243 def _parse_key_constraint_options(self) -> t.List[str]: 4244 options = [] 4245 while True: 4246 if not self._curr: 4247 break 4248 4249 if self._match(TokenType.ON): 4250 action = None 4251 on = self._advance_any() and self._prev.text 4252 4253 if self._match_text_seq("NO", "ACTION"): 4254 action = "NO ACTION" 4255 elif self._match_text_seq("CASCADE"): 4256 action = "CASCADE" 4257 elif self._match_text_seq("RESTRICT"): 4258 action = "RESTRICT" 4259 elif self._match_pair(TokenType.SET, TokenType.NULL): 4260 action = "SET NULL" 4261 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4262 action = "SET DEFAULT" 4263 else: 4264 self.raise_error("Invalid key constraint") 4265 4266 options.append(f"ON {on} {action}") 4267 elif self._match_text_seq("NOT", "ENFORCED"): 4268 options.append("NOT ENFORCED") 4269 elif self._match_text_seq("DEFERRABLE"): 4270 options.append("DEFERRABLE") 4271 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4272 options.append("INITIALLY DEFERRED") 4273 elif self._match_text_seq("NORELY"): 4274 options.append("NORELY") 4275 elif self._match_text_seq("MATCH", "FULL"): 4276 options.append("MATCH FULL") 4277 else: 4278 break 4279 4280 return options 4281 4282 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4283 if match and not self._match(TokenType.REFERENCES): 4284 return None 4285 4286 expressions = None 4287 this = self._parse_table(schema=True) 4288 options = self._parse_key_constraint_options() 4289 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4290 4291 def _parse_foreign_key(self) -> exp.ForeignKey: 4292 expressions = self._parse_wrapped_id_vars() 4293 reference = self._parse_references() 4294 options = {} 4295 4296 while self._match(TokenType.ON): 4297 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4298 self.raise_error("Expected DELETE or UPDATE") 4299 4300 kind = self._prev.text.lower() 4301 4302 if self._match_text_seq("NO", "ACTION"): 4303 action = "NO ACTION" 4304 elif self._match(TokenType.SET): 4305 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4306 action = "SET " + self._prev.text.upper() 4307 else: 4308 self._advance() 4309 action = self._prev.text.upper() 4310 4311 options[kind] = action 4312 4313 return self.expression( 4314 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4315 ) 4316 4317 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4318 return self._parse_field() 4319 4320 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4321 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4322 self._retreat(self._index - 1) 4323 return None 4324 4325 id_vars = self._parse_wrapped_id_vars() 4326 return self.expression( 4327 exp.PeriodForSystemTimeConstraint, 4328 this=seq_get(id_vars, 0), 4329 expression=seq_get(id_vars, 1), 4330 ) 4331 4332 def _parse_primary_key( 4333 self, wrapped_optional: bool = False, in_props: bool = False 4334 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4335 desc = ( 4336 self._match_set((TokenType.ASC, TokenType.DESC)) 4337 and self._prev.token_type == TokenType.DESC 4338 ) 4339 4340 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4341 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4342 4343 expressions = self._parse_wrapped_csv( 4344 self._parse_primary_key_part, optional=wrapped_optional 4345 ) 4346 options = self._parse_key_constraint_options() 4347 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4348 4349 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4350 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4351 4352 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4353 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4354 return this 4355 4356 bracket_kind = self._prev.token_type 4357 expressions = self._parse_csv( 4358 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4359 ) 4360 4361 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4362 self.raise_error("Expected ]") 4363 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4364 self.raise_error("Expected }") 4365 4366 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4367 if bracket_kind == TokenType.L_BRACE: 4368 this = self.expression(exp.Struct, expressions=expressions) 4369 elif not this or this.name.upper() == "ARRAY": 4370 this = self.expression(exp.Array, expressions=expressions) 4371 else: 4372 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4373 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4374 4375 self._add_comments(this) 4376 return self._parse_bracket(this) 4377 4378 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4379 if self._match(TokenType.COLON): 4380 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4381 return this 4382 4383 def _parse_case(self) -> t.Optional[exp.Expression]: 4384 ifs = [] 4385 default = None 4386 4387 comments = self._prev_comments 4388 expression = self._parse_conjunction() 4389 4390 while self._match(TokenType.WHEN): 4391 this = self._parse_conjunction() 4392 self._match(TokenType.THEN) 4393 then = self._parse_conjunction() 4394 ifs.append(self.expression(exp.If, this=this, true=then)) 4395 4396 if self._match(TokenType.ELSE): 4397 default = self._parse_conjunction() 4398 4399 if not self._match(TokenType.END): 4400 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4401 default = exp.column("interval") 4402 else: 4403 self.raise_error("Expected END after CASE", self._prev) 4404 4405 return self._parse_window( 4406 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4407 ) 4408 4409 def _parse_if(self) -> t.Optional[exp.Expression]: 4410 if self._match(TokenType.L_PAREN): 4411 args = self._parse_csv(self._parse_conjunction) 4412 this = self.validate_expression(exp.If.from_arg_list(args), args) 4413 self._match_r_paren() 4414 else: 4415 index = self._index - 1 4416 condition = self._parse_conjunction() 4417 4418 if not condition: 4419 self._retreat(index) 4420 return None 4421 4422 self._match(TokenType.THEN) 4423 true = self._parse_conjunction() 4424 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4425 self._match(TokenType.END) 4426 this = self.expression(exp.If, this=condition, true=true, false=false) 4427 4428 return self._parse_window(this) 4429 4430 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4431 if not self._match_text_seq("VALUE", "FOR"): 4432 self._retreat(self._index - 1) 4433 return None 4434 4435 return self.expression( 4436 exp.NextValueFor, 4437 this=self._parse_column(), 4438 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4439 ) 4440 4441 def _parse_extract(self) -> exp.Extract: 4442 this = self._parse_function() or self._parse_var() or self._parse_type() 4443 4444 if self._match(TokenType.FROM): 4445 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4446 4447 if not self._match(TokenType.COMMA): 4448 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4449 4450 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4451 4452 def _parse_any_value(self) -> exp.AnyValue: 4453 this = self._parse_lambda() 4454 is_max = None 4455 having = None 4456 4457 if self._match(TokenType.HAVING): 4458 self._match_texts(("MAX", "MIN")) 4459 is_max = self._prev.text == "MAX" 4460 having = self._parse_column() 4461 4462 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4463 4464 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4465 this = self._parse_conjunction() 4466 4467 if not self._match(TokenType.ALIAS): 4468 if self._match(TokenType.COMMA): 4469 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4470 4471 self.raise_error("Expected AS after CAST") 4472 4473 fmt = None 4474 to = self._parse_types() 4475 4476 if self._match(TokenType.FORMAT): 4477 fmt_string = self._parse_string() 4478 fmt = self._parse_at_time_zone(fmt_string) 4479 4480 if not to: 4481 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4482 if to.this in exp.DataType.TEMPORAL_TYPES: 4483 this = self.expression( 4484 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4485 this=this, 4486 format=exp.Literal.string( 4487 format_time( 4488 fmt_string.this if fmt_string else "", 4489 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4490 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4491 ) 4492 ), 4493 ) 4494 4495 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4496 this.set("zone", fmt.args["zone"]) 4497 return this 4498 elif not to: 4499 self.raise_error("Expected TYPE after CAST") 4500 elif isinstance(to, exp.Identifier): 4501 to = exp.DataType.build(to.name, udt=True) 4502 elif to.this == exp.DataType.Type.CHAR: 4503 if self._match(TokenType.CHARACTER_SET): 4504 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4505 4506 return self.expression( 4507 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4508 ) 4509 4510 def _parse_string_agg(self) -> exp.Expression: 4511 if self._match(TokenType.DISTINCT): 4512 args: t.List[t.Optional[exp.Expression]] = [ 4513 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4514 ] 4515 if self._match(TokenType.COMMA): 4516 args.extend(self._parse_csv(self._parse_conjunction)) 4517 else: 4518 args = self._parse_csv(self._parse_conjunction) # type: ignore 4519 4520 index = self._index 4521 if not self._match(TokenType.R_PAREN) and args: 4522 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4523 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4524 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4525 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4526 4527 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4528 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4529 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4530 if not self._match_text_seq("WITHIN", "GROUP"): 4531 self._retreat(index) 4532 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4533 4534 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4535 order = self._parse_order(this=seq_get(args, 0)) 4536 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4537 4538 def _parse_convert( 4539 self, strict: bool, safe: t.Optional[bool] = None 4540 ) -> t.Optional[exp.Expression]: 4541 this = self._parse_bitwise() 4542 4543 if self._match(TokenType.USING): 4544 to: t.Optional[exp.Expression] = self.expression( 4545 exp.CharacterSet, this=self._parse_var() 4546 ) 4547 elif self._match(TokenType.COMMA): 4548 to = self._parse_types() 4549 else: 4550 to = None 4551 4552 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4553 4554 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4555 """ 4556 There are generally two variants of the DECODE function: 4557 4558 - DECODE(bin, charset) 4559 - DECODE(expression, search, result [, search, result] ... [, default]) 4560 4561 The second variant will always be parsed into a CASE expression. Note that NULL 4562 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4563 instead of relying on pattern matching. 4564 """ 4565 args = self._parse_csv(self._parse_conjunction) 4566 4567 if len(args) < 3: 4568 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4569 4570 expression, *expressions = args 4571 if not expression: 4572 return None 4573 4574 ifs = [] 4575 for search, result in zip(expressions[::2], expressions[1::2]): 4576 if not search or not result: 4577 return None 4578 4579 if isinstance(search, exp.Literal): 4580 ifs.append( 4581 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4582 ) 4583 elif isinstance(search, exp.Null): 4584 ifs.append( 4585 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4586 ) 4587 else: 4588 cond = exp.or_( 4589 exp.EQ(this=expression.copy(), expression=search), 4590 exp.and_( 4591 exp.Is(this=expression.copy(), expression=exp.Null()), 4592 exp.Is(this=search.copy(), expression=exp.Null()), 4593 copy=False, 4594 ), 4595 copy=False, 4596 ) 4597 ifs.append(exp.If(this=cond, true=result)) 4598 4599 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4600 4601 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4602 self._match_text_seq("KEY") 4603 key = self._parse_column() 4604 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4605 self._match_text_seq("VALUE") 4606 value = self._parse_bitwise() 4607 4608 if not key and not value: 4609 return None 4610 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4611 4612 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4613 if not this or not self._match_text_seq("FORMAT", "JSON"): 4614 return this 4615 4616 return self.expression(exp.FormatJson, this=this) 4617 4618 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4619 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4620 for value in values: 4621 if self._match_text_seq(value, "ON", on): 4622 return f"{value} ON {on}" 4623 4624 return None 4625 4626 @t.overload 4627 def _parse_json_object(self, agg: Literal[False]) -> exp.JSONObject: 4628 ... 4629 4630 @t.overload 4631 def _parse_json_object(self, agg: Literal[True]) -> exp.JSONObjectAgg: 4632 ... 4633 4634 def _parse_json_object(self, agg=False): 4635 star = self._parse_star() 4636 expressions = ( 4637 [star] 4638 if star 4639 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4640 ) 4641 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4642 4643 unique_keys = None 4644 if self._match_text_seq("WITH", "UNIQUE"): 4645 unique_keys = True 4646 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4647 unique_keys = False 4648 4649 self._match_text_seq("KEYS") 4650 4651 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4652 self._parse_type() 4653 ) 4654 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4655 4656 return self.expression( 4657 exp.JSONObjectAgg if agg else exp.JSONObject, 4658 expressions=expressions, 4659 null_handling=null_handling, 4660 unique_keys=unique_keys, 4661 return_type=return_type, 4662 encoding=encoding, 4663 ) 4664 4665 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4666 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4667 if not self._match_text_seq("NESTED"): 4668 this = self._parse_id_var() 4669 kind = self._parse_types(allow_identifiers=False) 4670 nested = None 4671 else: 4672 this = None 4673 kind = None 4674 nested = True 4675 4676 path = self._match_text_seq("PATH") and self._parse_string() 4677 nested_schema = nested and self._parse_json_schema() 4678 4679 return self.expression( 4680 exp.JSONColumnDef, 4681 this=this, 4682 kind=kind, 4683 path=path, 4684 nested_schema=nested_schema, 4685 ) 4686 4687 def _parse_json_schema(self) -> exp.JSONSchema: 4688 self._match_text_seq("COLUMNS") 4689 return self.expression( 4690 exp.JSONSchema, 4691 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4692 ) 4693 4694 def _parse_json_table(self) -> exp.JSONTable: 4695 this = self._parse_format_json(self._parse_bitwise()) 4696 path = self._match(TokenType.COMMA) and self._parse_string() 4697 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4698 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4699 schema = self._parse_json_schema() 4700 4701 return exp.JSONTable( 4702 this=this, 4703 schema=schema, 4704 path=path, 4705 error_handling=error_handling, 4706 empty_handling=empty_handling, 4707 ) 4708 4709 def _parse_match_against(self) -> exp.MatchAgainst: 4710 expressions = self._parse_csv(self._parse_column) 4711 4712 self._match_text_seq(")", "AGAINST", "(") 4713 4714 this = self._parse_string() 4715 4716 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4717 modifier = "IN NATURAL LANGUAGE MODE" 4718 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4719 modifier = f"{modifier} WITH QUERY EXPANSION" 4720 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4721 modifier = "IN BOOLEAN MODE" 4722 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4723 modifier = "WITH QUERY EXPANSION" 4724 else: 4725 modifier = None 4726 4727 return self.expression( 4728 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4729 ) 4730 4731 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4732 def _parse_open_json(self) -> exp.OpenJSON: 4733 this = self._parse_bitwise() 4734 path = self._match(TokenType.COMMA) and self._parse_string() 4735 4736 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4737 this = self._parse_field(any_token=True) 4738 kind = self._parse_types() 4739 path = self._parse_string() 4740 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4741 4742 return self.expression( 4743 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4744 ) 4745 4746 expressions = None 4747 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4748 self._match_l_paren() 4749 expressions = self._parse_csv(_parse_open_json_column_def) 4750 4751 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4752 4753 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4754 args = self._parse_csv(self._parse_bitwise) 4755 4756 if self._match(TokenType.IN): 4757 return self.expression( 4758 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4759 ) 4760 4761 if haystack_first: 4762 haystack = seq_get(args, 0) 4763 needle = seq_get(args, 1) 4764 else: 4765 needle = seq_get(args, 0) 4766 haystack = seq_get(args, 1) 4767 4768 return self.expression( 4769 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4770 ) 4771 4772 def _parse_predict(self) -> exp.Predict: 4773 self._match_text_seq("MODEL") 4774 this = self._parse_table() 4775 4776 self._match(TokenType.COMMA) 4777 self._match_text_seq("TABLE") 4778 4779 return self.expression( 4780 exp.Predict, 4781 this=this, 4782 expression=self._parse_table(), 4783 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4784 ) 4785 4786 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4787 args = self._parse_csv(self._parse_table) 4788 return exp.JoinHint(this=func_name.upper(), expressions=args) 4789 4790 def _parse_substring(self) -> exp.Substring: 4791 # Postgres supports the form: substring(string [from int] [for int]) 4792 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4793 4794 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4795 4796 if self._match(TokenType.FROM): 4797 args.append(self._parse_bitwise()) 4798 if self._match(TokenType.FOR): 4799 args.append(self._parse_bitwise()) 4800 4801 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4802 4803 def _parse_trim(self) -> exp.Trim: 4804 # https://www.w3resource.com/sql/character-functions/trim.php 4805 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4806 4807 position = None 4808 collation = None 4809 expression = None 4810 4811 if self._match_texts(self.TRIM_TYPES): 4812 position = self._prev.text.upper() 4813 4814 this = self._parse_bitwise() 4815 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4816 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4817 expression = self._parse_bitwise() 4818 4819 if invert_order: 4820 this, expression = expression, this 4821 4822 if self._match(TokenType.COLLATE): 4823 collation = self._parse_bitwise() 4824 4825 return self.expression( 4826 exp.Trim, this=this, position=position, expression=expression, collation=collation 4827 ) 4828 4829 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4830 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4831 4832 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4833 return self._parse_window(self._parse_id_var(), alias=True) 4834 4835 def _parse_respect_or_ignore_nulls( 4836 self, this: t.Optional[exp.Expression] 4837 ) -> t.Optional[exp.Expression]: 4838 if self._match_text_seq("IGNORE", "NULLS"): 4839 return self.expression(exp.IgnoreNulls, this=this) 4840 if self._match_text_seq("RESPECT", "NULLS"): 4841 return self.expression(exp.RespectNulls, this=this) 4842 return this 4843 4844 def _parse_window( 4845 self, this: t.Optional[exp.Expression], alias: bool = False 4846 ) -> t.Optional[exp.Expression]: 4847 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4848 self._match(TokenType.WHERE) 4849 this = self.expression( 4850 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4851 ) 4852 self._match_r_paren() 4853 4854 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4855 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4856 if self._match_text_seq("WITHIN", "GROUP"): 4857 order = self._parse_wrapped(self._parse_order) 4858 this = self.expression(exp.WithinGroup, this=this, expression=order) 4859 4860 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4861 # Some dialects choose to implement and some do not. 4862 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4863 4864 # There is some code above in _parse_lambda that handles 4865 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4866 4867 # The below changes handle 4868 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4869 4870 # Oracle allows both formats 4871 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4872 # and Snowflake chose to do the same for familiarity 4873 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4874 this = self._parse_respect_or_ignore_nulls(this) 4875 4876 # bigquery select from window x AS (partition by ...) 4877 if alias: 4878 over = None 4879 self._match(TokenType.ALIAS) 4880 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4881 return this 4882 else: 4883 over = self._prev.text.upper() 4884 4885 if not self._match(TokenType.L_PAREN): 4886 return self.expression( 4887 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4888 ) 4889 4890 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4891 4892 first = self._match(TokenType.FIRST) 4893 if self._match_text_seq("LAST"): 4894 first = False 4895 4896 partition, order = self._parse_partition_and_order() 4897 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4898 4899 if kind: 4900 self._match(TokenType.BETWEEN) 4901 start = self._parse_window_spec() 4902 self._match(TokenType.AND) 4903 end = self._parse_window_spec() 4904 4905 spec = self.expression( 4906 exp.WindowSpec, 4907 kind=kind, 4908 start=start["value"], 4909 start_side=start["side"], 4910 end=end["value"], 4911 end_side=end["side"], 4912 ) 4913 else: 4914 spec = None 4915 4916 self._match_r_paren() 4917 4918 window = self.expression( 4919 exp.Window, 4920 this=this, 4921 partition_by=partition, 4922 order=order, 4923 spec=spec, 4924 alias=window_alias, 4925 over=over, 4926 first=first, 4927 ) 4928 4929 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4930 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4931 return self._parse_window(window, alias=alias) 4932 4933 return window 4934 4935 def _parse_partition_and_order( 4936 self, 4937 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4938 return self._parse_partition_by(), self._parse_order() 4939 4940 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4941 self._match(TokenType.BETWEEN) 4942 4943 return { 4944 "value": ( 4945 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4946 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4947 or self._parse_bitwise() 4948 ), 4949 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4950 } 4951 4952 def _parse_alias( 4953 self, this: t.Optional[exp.Expression], explicit: bool = False 4954 ) -> t.Optional[exp.Expression]: 4955 any_token = self._match(TokenType.ALIAS) 4956 comments = self._prev_comments 4957 4958 if explicit and not any_token: 4959 return this 4960 4961 if self._match(TokenType.L_PAREN): 4962 aliases = self.expression( 4963 exp.Aliases, 4964 comments=comments, 4965 this=this, 4966 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4967 ) 4968 self._match_r_paren(aliases) 4969 return aliases 4970 4971 alias = self._parse_id_var(any_token) or ( 4972 self.STRING_ALIASES and self._parse_string_as_identifier() 4973 ) 4974 4975 if alias: 4976 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 4977 4978 # Moves the comment next to the alias in `expr /* comment */ AS alias` 4979 if not this.comments and this.this.comments: 4980 this.comments = this.this.comments 4981 this.this.comments = None 4982 4983 return this 4984 4985 def _parse_id_var( 4986 self, 4987 any_token: bool = True, 4988 tokens: t.Optional[t.Collection[TokenType]] = None, 4989 ) -> t.Optional[exp.Expression]: 4990 identifier = self._parse_identifier() 4991 4992 if identifier: 4993 return identifier 4994 4995 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4996 quoted = self._prev.token_type == TokenType.STRING 4997 return exp.Identifier(this=self._prev.text, quoted=quoted) 4998 4999 return None 5000 5001 def _parse_string(self) -> t.Optional[exp.Expression]: 5002 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 5003 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 5004 return self._parse_placeholder() 5005 5006 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5007 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5008 5009 def _parse_number(self) -> t.Optional[exp.Expression]: 5010 if self._match(TokenType.NUMBER): 5011 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 5012 return self._parse_placeholder() 5013 5014 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5015 if self._match(TokenType.IDENTIFIER): 5016 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5017 return self._parse_placeholder() 5018 5019 def _parse_var( 5020 self, 5021 any_token: bool = False, 5022 tokens: t.Optional[t.Collection[TokenType]] = None, 5023 upper: bool = False, 5024 ) -> t.Optional[exp.Expression]: 5025 if ( 5026 (any_token and self._advance_any()) 5027 or self._match(TokenType.VAR) 5028 or (self._match_set(tokens) if tokens else False) 5029 ): 5030 return self.expression( 5031 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5032 ) 5033 return self._parse_placeholder() 5034 5035 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5036 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5037 self._advance() 5038 return self._prev 5039 return None 5040 5041 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5042 return self._parse_var() or self._parse_string() 5043 5044 def _parse_null(self) -> t.Optional[exp.Expression]: 5045 if self._match_set(self.NULL_TOKENS): 5046 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5047 return self._parse_placeholder() 5048 5049 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5050 if self._match(TokenType.TRUE): 5051 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5052 if self._match(TokenType.FALSE): 5053 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5054 return self._parse_placeholder() 5055 5056 def _parse_star(self) -> t.Optional[exp.Expression]: 5057 if self._match(TokenType.STAR): 5058 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5059 return self._parse_placeholder() 5060 5061 def _parse_parameter(self) -> exp.Parameter: 5062 def _parse_parameter_part() -> t.Optional[exp.Expression]: 5063 return ( 5064 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 5065 ) 5066 5067 self._match(TokenType.L_BRACE) 5068 this = _parse_parameter_part() 5069 expression = self._match(TokenType.COLON) and _parse_parameter_part() 5070 self._match(TokenType.R_BRACE) 5071 5072 return self.expression(exp.Parameter, this=this, expression=expression) 5073 5074 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5075 if self._match_set(self.PLACEHOLDER_PARSERS): 5076 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5077 if placeholder: 5078 return placeholder 5079 self._advance(-1) 5080 return None 5081 5082 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5083 if not self._match(TokenType.EXCEPT): 5084 return None 5085 if self._match(TokenType.L_PAREN, advance=False): 5086 return self._parse_wrapped_csv(self._parse_column) 5087 5088 except_column = self._parse_column() 5089 return [except_column] if except_column else None 5090 5091 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5092 if not self._match(TokenType.REPLACE): 5093 return None 5094 if self._match(TokenType.L_PAREN, advance=False): 5095 return self._parse_wrapped_csv(self._parse_expression) 5096 5097 replace_expression = self._parse_expression() 5098 return [replace_expression] if replace_expression else None 5099 5100 def _parse_csv( 5101 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5102 ) -> t.List[exp.Expression]: 5103 parse_result = parse_method() 5104 items = [parse_result] if parse_result is not None else [] 5105 5106 while self._match(sep): 5107 self._add_comments(parse_result) 5108 parse_result = parse_method() 5109 if parse_result is not None: 5110 items.append(parse_result) 5111 5112 return items 5113 5114 def _parse_tokens( 5115 self, parse_method: t.Callable, expressions: t.Dict 5116 ) -> t.Optional[exp.Expression]: 5117 this = parse_method() 5118 5119 while self._match_set(expressions): 5120 this = self.expression( 5121 expressions[self._prev.token_type], 5122 this=this, 5123 comments=self._prev_comments, 5124 expression=parse_method(), 5125 ) 5126 5127 return this 5128 5129 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5130 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5131 5132 def _parse_wrapped_csv( 5133 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5134 ) -> t.List[exp.Expression]: 5135 return self._parse_wrapped( 5136 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5137 ) 5138 5139 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5140 wrapped = self._match(TokenType.L_PAREN) 5141 if not wrapped and not optional: 5142 self.raise_error("Expecting (") 5143 parse_result = parse_method() 5144 if wrapped: 5145 self._match_r_paren() 5146 return parse_result 5147 5148 def _parse_expressions(self) -> t.List[exp.Expression]: 5149 return self._parse_csv(self._parse_expression) 5150 5151 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5152 return self._parse_select() or self._parse_set_operations( 5153 self._parse_expression() if alias else self._parse_conjunction() 5154 ) 5155 5156 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5157 return self._parse_query_modifiers( 5158 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5159 ) 5160 5161 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5162 this = None 5163 if self._match_texts(self.TRANSACTION_KIND): 5164 this = self._prev.text 5165 5166 self._match_texts(("TRANSACTION", "WORK")) 5167 5168 modes = [] 5169 while True: 5170 mode = [] 5171 while self._match(TokenType.VAR): 5172 mode.append(self._prev.text) 5173 5174 if mode: 5175 modes.append(" ".join(mode)) 5176 if not self._match(TokenType.COMMA): 5177 break 5178 5179 return self.expression(exp.Transaction, this=this, modes=modes) 5180 5181 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5182 chain = None 5183 savepoint = None 5184 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5185 5186 self._match_texts(("TRANSACTION", "WORK")) 5187 5188 if self._match_text_seq("TO"): 5189 self._match_text_seq("SAVEPOINT") 5190 savepoint = self._parse_id_var() 5191 5192 if self._match(TokenType.AND): 5193 chain = not self._match_text_seq("NO") 5194 self._match_text_seq("CHAIN") 5195 5196 if is_rollback: 5197 return self.expression(exp.Rollback, savepoint=savepoint) 5198 5199 return self.expression(exp.Commit, chain=chain) 5200 5201 def _parse_refresh(self) -> exp.Refresh: 5202 self._match(TokenType.TABLE) 5203 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5204 5205 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5206 if not self._match_text_seq("ADD"): 5207 return None 5208 5209 self._match(TokenType.COLUMN) 5210 exists_column = self._parse_exists(not_=True) 5211 expression = self._parse_field_def() 5212 5213 if expression: 5214 expression.set("exists", exists_column) 5215 5216 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5217 if self._match_texts(("FIRST", "AFTER")): 5218 position = self._prev.text 5219 column_position = self.expression( 5220 exp.ColumnPosition, this=self._parse_column(), position=position 5221 ) 5222 expression.set("position", column_position) 5223 5224 return expression 5225 5226 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5227 drop = self._match(TokenType.DROP) and self._parse_drop() 5228 if drop and not isinstance(drop, exp.Command): 5229 drop.set("kind", drop.args.get("kind", "COLUMN")) 5230 return drop 5231 5232 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5233 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5234 return self.expression( 5235 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5236 ) 5237 5238 def _parse_add_constraint(self) -> exp.AddConstraint: 5239 this = None 5240 kind = self._prev.token_type 5241 5242 if kind == TokenType.CONSTRAINT: 5243 this = self._parse_id_var() 5244 5245 if self._match_text_seq("CHECK"): 5246 expression = self._parse_wrapped(self._parse_conjunction) 5247 enforced = self._match_text_seq("ENFORCED") 5248 5249 return self.expression( 5250 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5251 ) 5252 5253 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5254 expression = self._parse_foreign_key() 5255 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5256 expression = self._parse_primary_key() 5257 else: 5258 expression = None 5259 5260 return self.expression(exp.AddConstraint, this=this, expression=expression) 5261 5262 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5263 index = self._index - 1 5264 5265 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5266 return self._parse_csv(self._parse_add_constraint) 5267 5268 self._retreat(index) 5269 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5270 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5271 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5272 5273 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5274 self._match(TokenType.COLUMN) 5275 column = self._parse_field(any_token=True) 5276 5277 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5278 return self.expression(exp.AlterColumn, this=column, drop=True) 5279 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5280 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5281 5282 self._match_text_seq("SET", "DATA") 5283 return self.expression( 5284 exp.AlterColumn, 5285 this=column, 5286 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5287 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5288 using=self._match(TokenType.USING) and self._parse_conjunction(), 5289 ) 5290 5291 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5292 index = self._index - 1 5293 5294 partition_exists = self._parse_exists() 5295 if self._match(TokenType.PARTITION, advance=False): 5296 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5297 5298 self._retreat(index) 5299 return self._parse_csv(self._parse_drop_column) 5300 5301 def _parse_alter_table_rename(self) -> exp.RenameTable: 5302 self._match_text_seq("TO") 5303 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5304 5305 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5306 start = self._prev 5307 5308 if not self._match(TokenType.TABLE): 5309 return self._parse_as_command(start) 5310 5311 exists = self._parse_exists() 5312 only = self._match_text_seq("ONLY") 5313 this = self._parse_table(schema=True) 5314 5315 if self._next: 5316 self._advance() 5317 5318 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5319 if parser: 5320 actions = ensure_list(parser(self)) 5321 5322 if not self._curr: 5323 return self.expression( 5324 exp.AlterTable, 5325 this=this, 5326 exists=exists, 5327 actions=actions, 5328 only=only, 5329 ) 5330 5331 return self._parse_as_command(start) 5332 5333 def _parse_merge(self) -> exp.Merge: 5334 self._match(TokenType.INTO) 5335 target = self._parse_table() 5336 5337 if target and self._match(TokenType.ALIAS, advance=False): 5338 target.set("alias", self._parse_table_alias()) 5339 5340 self._match(TokenType.USING) 5341 using = self._parse_table() 5342 5343 self._match(TokenType.ON) 5344 on = self._parse_conjunction() 5345 5346 return self.expression( 5347 exp.Merge, 5348 this=target, 5349 using=using, 5350 on=on, 5351 expressions=self._parse_when_matched(), 5352 ) 5353 5354 def _parse_when_matched(self) -> t.List[exp.When]: 5355 whens = [] 5356 5357 while self._match(TokenType.WHEN): 5358 matched = not self._match(TokenType.NOT) 5359 self._match_text_seq("MATCHED") 5360 source = ( 5361 False 5362 if self._match_text_seq("BY", "TARGET") 5363 else self._match_text_seq("BY", "SOURCE") 5364 ) 5365 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5366 5367 self._match(TokenType.THEN) 5368 5369 if self._match(TokenType.INSERT): 5370 _this = self._parse_star() 5371 if _this: 5372 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5373 else: 5374 then = self.expression( 5375 exp.Insert, 5376 this=self._parse_value(), 5377 expression=self._match(TokenType.VALUES) and self._parse_value(), 5378 ) 5379 elif self._match(TokenType.UPDATE): 5380 expressions = self._parse_star() 5381 if expressions: 5382 then = self.expression(exp.Update, expressions=expressions) 5383 else: 5384 then = self.expression( 5385 exp.Update, 5386 expressions=self._match(TokenType.SET) 5387 and self._parse_csv(self._parse_equality), 5388 ) 5389 elif self._match(TokenType.DELETE): 5390 then = self.expression(exp.Var, this=self._prev.text) 5391 else: 5392 then = None 5393 5394 whens.append( 5395 self.expression( 5396 exp.When, 5397 matched=matched, 5398 source=source, 5399 condition=condition, 5400 then=then, 5401 ) 5402 ) 5403 return whens 5404 5405 def _parse_show(self) -> t.Optional[exp.Expression]: 5406 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5407 if parser: 5408 return parser(self) 5409 return self._parse_as_command(self._prev) 5410 5411 def _parse_set_item_assignment( 5412 self, kind: t.Optional[str] = None 5413 ) -> t.Optional[exp.Expression]: 5414 index = self._index 5415 5416 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5417 return self._parse_set_transaction(global_=kind == "GLOBAL") 5418 5419 left = self._parse_primary() or self._parse_id_var() 5420 assignment_delimiter = self._match_texts(("=", "TO")) 5421 5422 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5423 self._retreat(index) 5424 return None 5425 5426 right = self._parse_statement() or self._parse_id_var() 5427 this = self.expression(exp.EQ, this=left, expression=right) 5428 5429 return self.expression(exp.SetItem, this=this, kind=kind) 5430 5431 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5432 self._match_text_seq("TRANSACTION") 5433 characteristics = self._parse_csv( 5434 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5435 ) 5436 return self.expression( 5437 exp.SetItem, 5438 expressions=characteristics, 5439 kind="TRANSACTION", 5440 **{"global": global_}, # type: ignore 5441 ) 5442 5443 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5444 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5445 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5446 5447 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5448 index = self._index 5449 set_ = self.expression( 5450 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5451 ) 5452 5453 if self._curr: 5454 self._retreat(index) 5455 return self._parse_as_command(self._prev) 5456 5457 return set_ 5458 5459 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5460 for option in options: 5461 if self._match_text_seq(*option.split(" ")): 5462 return exp.var(option) 5463 return None 5464 5465 def _parse_as_command(self, start: Token) -> exp.Command: 5466 while self._curr: 5467 self._advance() 5468 text = self._find_sql(start, self._prev) 5469 size = len(start.text) 5470 return exp.Command(this=text[:size], expression=text[size:]) 5471 5472 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5473 settings = [] 5474 5475 self._match_l_paren() 5476 kind = self._parse_id_var() 5477 5478 if self._match(TokenType.L_PAREN): 5479 while True: 5480 key = self._parse_id_var() 5481 value = self._parse_primary() 5482 5483 if not key and value is None: 5484 break 5485 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5486 self._match(TokenType.R_PAREN) 5487 5488 self._match_r_paren() 5489 5490 return self.expression( 5491 exp.DictProperty, 5492 this=this, 5493 kind=kind.this if kind else None, 5494 settings=settings, 5495 ) 5496 5497 def _parse_dict_range(self, this: str) -> exp.DictRange: 5498 self._match_l_paren() 5499 has_min = self._match_text_seq("MIN") 5500 if has_min: 5501 min = self._parse_var() or self._parse_primary() 5502 self._match_text_seq("MAX") 5503 max = self._parse_var() or self._parse_primary() 5504 else: 5505 max = self._parse_var() or self._parse_primary() 5506 min = exp.Literal.number(0) 5507 self._match_r_paren() 5508 return self.expression(exp.DictRange, this=this, min=min, max=max) 5509 5510 def _parse_comprehension( 5511 self, this: t.Optional[exp.Expression] 5512 ) -> t.Optional[exp.Comprehension]: 5513 index = self._index 5514 expression = self._parse_column() 5515 if not self._match(TokenType.IN): 5516 self._retreat(index - 1) 5517 return None 5518 iterator = self._parse_column() 5519 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5520 return self.expression( 5521 exp.Comprehension, 5522 this=this, 5523 expression=expression, 5524 iterator=iterator, 5525 condition=condition, 5526 ) 5527 5528 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5529 if self._match(TokenType.HEREDOC_STRING): 5530 return self.expression(exp.Heredoc, this=self._prev.text) 5531 5532 if not self._match_text_seq("$"): 5533 return None 5534 5535 tags = ["$"] 5536 tag_text = None 5537 5538 if self._is_connected(): 5539 self._advance() 5540 tags.append(self._prev.text.upper()) 5541 else: 5542 self.raise_error("No closing $ found") 5543 5544 if tags[-1] != "$": 5545 if self._is_connected() and self._match_text_seq("$"): 5546 tag_text = tags[-1] 5547 tags.append("$") 5548 else: 5549 self.raise_error("No closing $ found") 5550 5551 heredoc_start = self._curr 5552 5553 while self._curr: 5554 if self._match_text_seq(*tags, advance=False): 5555 this = self._find_sql(heredoc_start, self._prev) 5556 self._advance(len(tags)) 5557 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5558 5559 self._advance() 5560 5561 self.raise_error(f"No closing {''.join(tags)} found") 5562 return None 5563 5564 def _find_parser( 5565 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5566 ) -> t.Optional[t.Callable]: 5567 if not self._curr: 5568 return None 5569 5570 index = self._index 5571 this = [] 5572 while True: 5573 # The current token might be multiple words 5574 curr = self._curr.text.upper() 5575 key = curr.split(" ") 5576 this.append(curr) 5577 5578 self._advance() 5579 result, trie = in_trie(trie, key) 5580 if result == TrieResult.FAILED: 5581 break 5582 5583 if result == TrieResult.EXISTS: 5584 subparser = parsers[" ".join(this)] 5585 return subparser 5586 5587 self._retreat(index) 5588 return None 5589 5590 def _match(self, token_type, advance=True, expression=None): 5591 if not self._curr: 5592 return None 5593 5594 if self._curr.token_type == token_type: 5595 if advance: 5596 self._advance() 5597 self._add_comments(expression) 5598 return True 5599 5600 return None 5601 5602 def _match_set(self, types, advance=True): 5603 if not self._curr: 5604 return None 5605 5606 if self._curr.token_type in types: 5607 if advance: 5608 self._advance() 5609 return True 5610 5611 return None 5612 5613 def _match_pair(self, token_type_a, token_type_b, advance=True): 5614 if not self._curr or not self._next: 5615 return None 5616 5617 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5618 if advance: 5619 self._advance(2) 5620 return True 5621 5622 return None 5623 5624 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5625 if not self._match(TokenType.L_PAREN, expression=expression): 5626 self.raise_error("Expecting (") 5627 5628 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5629 if not self._match(TokenType.R_PAREN, expression=expression): 5630 self.raise_error("Expecting )") 5631 5632 def _match_texts(self, texts, advance=True): 5633 if self._curr and self._curr.text.upper() in texts: 5634 if advance: 5635 self._advance() 5636 return True 5637 return False 5638 5639 def _match_text_seq(self, *texts, advance=True): 5640 index = self._index 5641 for text in texts: 5642 if self._curr and self._curr.text.upper() == text: 5643 self._advance() 5644 else: 5645 self._retreat(index) 5646 return False 5647 5648 if not advance: 5649 self._retreat(index) 5650 5651 return True 5652 5653 @t.overload 5654 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5655 ... 5656 5657 @t.overload 5658 def _replace_columns_with_dots( 5659 self, this: t.Optional[exp.Expression] 5660 ) -> t.Optional[exp.Expression]: 5661 ... 5662 5663 def _replace_columns_with_dots(self, this): 5664 if isinstance(this, exp.Dot): 5665 exp.replace_children(this, self._replace_columns_with_dots) 5666 elif isinstance(this, exp.Column): 5667 exp.replace_children(this, self._replace_columns_with_dots) 5668 table = this.args.get("table") 5669 this = ( 5670 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5671 ) 5672 5673 return this 5674 5675 def _replace_lambda( 5676 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5677 ) -> t.Optional[exp.Expression]: 5678 if not node: 5679 return node 5680 5681 for column in node.find_all(exp.Column): 5682 if column.parts[0].name in lambda_variables: 5683 dot_or_id = column.to_dot() if column.table else column.this 5684 parent = column.parent 5685 5686 while isinstance(parent, exp.Dot): 5687 if not isinstance(parent.parent, exp.Dot): 5688 parent.replace(dot_or_id) 5689 break 5690 parent = parent.parent 5691 else: 5692 if column is node: 5693 node = dot_or_id 5694 else: 5695 column.replace(dot_or_id) 5696 return node
24def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 25 if len(args) == 1 and args[0].is_star: 26 return exp.StarMap(this=args[0]) 27 28 keys = [] 29 values = [] 30 for i in range(0, len(args), 2): 31 keys.append(args[i]) 32 values.append(args[i + 1]) 33 34 return exp.VarMap( 35 keys=exp.Array(expressions=keys), 36 values=exp.Array(expressions=values), 37 )
53def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 54 # Default argument order is base, expression 55 this = seq_get(args, 0) 56 expression = seq_get(args, 1) 57 58 if expression: 59 if not dialect.LOG_BASE_FIRST: 60 this, expression = expression, this 61 return exp.Log(this=this, expression=expression) 62 63 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
76class Parser(metaclass=_Parser): 77 """ 78 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 79 80 Args: 81 error_level: The desired error level. 82 Default: ErrorLevel.IMMEDIATE 83 error_message_context: Determines the amount of context to capture from a 84 query string when displaying the error message (in number of characters). 85 Default: 100 86 max_errors: Maximum number of error messages to include in a raised ParseError. 87 This is only relevant if error_level is ErrorLevel.RAISE. 88 Default: 3 89 """ 90 91 FUNCTIONS: t.Dict[str, t.Callable] = { 92 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 93 "CONCAT": lambda args, dialect: exp.Concat( 94 expressions=args, 95 safe=not dialect.STRICT_STRING_CONCAT, 96 coalesce=dialect.CONCAT_COALESCE, 97 ), 98 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 99 expressions=args, 100 safe=not dialect.STRICT_STRING_CONCAT, 101 coalesce=dialect.CONCAT_COALESCE, 102 ), 103 "DATE_TO_DATE_STR": lambda args: exp.Cast( 104 this=seq_get(args, 0), 105 to=exp.DataType(this=exp.DataType.Type.TEXT), 106 ), 107 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 108 "LIKE": parse_like, 109 "LOG": parse_logarithm, 110 "TIME_TO_TIME_STR": lambda args: exp.Cast( 111 this=seq_get(args, 0), 112 to=exp.DataType(this=exp.DataType.Type.TEXT), 113 ), 114 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 115 this=exp.Cast( 116 this=seq_get(args, 0), 117 to=exp.DataType(this=exp.DataType.Type.TEXT), 118 ), 119 start=exp.Literal.number(1), 120 length=exp.Literal.number(10), 121 ), 122 "VAR_MAP": parse_var_map, 123 } 124 125 NO_PAREN_FUNCTIONS = { 126 TokenType.CURRENT_DATE: exp.CurrentDate, 127 TokenType.CURRENT_DATETIME: exp.CurrentDate, 128 TokenType.CURRENT_TIME: exp.CurrentTime, 129 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 130 TokenType.CURRENT_USER: exp.CurrentUser, 131 } 132 133 STRUCT_TYPE_TOKENS = { 134 TokenType.NESTED, 135 TokenType.STRUCT, 136 } 137 138 NESTED_TYPE_TOKENS = { 139 TokenType.ARRAY, 140 TokenType.LOWCARDINALITY, 141 TokenType.MAP, 142 TokenType.NULLABLE, 143 *STRUCT_TYPE_TOKENS, 144 } 145 146 ENUM_TYPE_TOKENS = { 147 TokenType.ENUM, 148 TokenType.ENUM8, 149 TokenType.ENUM16, 150 } 151 152 TYPE_TOKENS = { 153 TokenType.BIT, 154 TokenType.BOOLEAN, 155 TokenType.TINYINT, 156 TokenType.UTINYINT, 157 TokenType.SMALLINT, 158 TokenType.USMALLINT, 159 TokenType.INT, 160 TokenType.UINT, 161 TokenType.BIGINT, 162 TokenType.UBIGINT, 163 TokenType.INT128, 164 TokenType.UINT128, 165 TokenType.INT256, 166 TokenType.UINT256, 167 TokenType.MEDIUMINT, 168 TokenType.UMEDIUMINT, 169 TokenType.FIXEDSTRING, 170 TokenType.FLOAT, 171 TokenType.DOUBLE, 172 TokenType.CHAR, 173 TokenType.NCHAR, 174 TokenType.VARCHAR, 175 TokenType.NVARCHAR, 176 TokenType.TEXT, 177 TokenType.MEDIUMTEXT, 178 TokenType.LONGTEXT, 179 TokenType.MEDIUMBLOB, 180 TokenType.LONGBLOB, 181 TokenType.BINARY, 182 TokenType.VARBINARY, 183 TokenType.JSON, 184 TokenType.JSONB, 185 TokenType.INTERVAL, 186 TokenType.TINYBLOB, 187 TokenType.TINYTEXT, 188 TokenType.TIME, 189 TokenType.TIMETZ, 190 TokenType.TIMESTAMP, 191 TokenType.TIMESTAMP_S, 192 TokenType.TIMESTAMP_MS, 193 TokenType.TIMESTAMP_NS, 194 TokenType.TIMESTAMPTZ, 195 TokenType.TIMESTAMPLTZ, 196 TokenType.DATETIME, 197 TokenType.DATETIME64, 198 TokenType.DATE, 199 TokenType.DATE32, 200 TokenType.INT4RANGE, 201 TokenType.INT4MULTIRANGE, 202 TokenType.INT8RANGE, 203 TokenType.INT8MULTIRANGE, 204 TokenType.NUMRANGE, 205 TokenType.NUMMULTIRANGE, 206 TokenType.TSRANGE, 207 TokenType.TSMULTIRANGE, 208 TokenType.TSTZRANGE, 209 TokenType.TSTZMULTIRANGE, 210 TokenType.DATERANGE, 211 TokenType.DATEMULTIRANGE, 212 TokenType.DECIMAL, 213 TokenType.UDECIMAL, 214 TokenType.BIGDECIMAL, 215 TokenType.UUID, 216 TokenType.GEOGRAPHY, 217 TokenType.GEOMETRY, 218 TokenType.HLLSKETCH, 219 TokenType.HSTORE, 220 TokenType.PSEUDO_TYPE, 221 TokenType.SUPER, 222 TokenType.SERIAL, 223 TokenType.SMALLSERIAL, 224 TokenType.BIGSERIAL, 225 TokenType.XML, 226 TokenType.YEAR, 227 TokenType.UNIQUEIDENTIFIER, 228 TokenType.USERDEFINED, 229 TokenType.MONEY, 230 TokenType.SMALLMONEY, 231 TokenType.ROWVERSION, 232 TokenType.IMAGE, 233 TokenType.VARIANT, 234 TokenType.OBJECT, 235 TokenType.OBJECT_IDENTIFIER, 236 TokenType.INET, 237 TokenType.IPADDRESS, 238 TokenType.IPPREFIX, 239 TokenType.IPV4, 240 TokenType.IPV6, 241 TokenType.UNKNOWN, 242 TokenType.NULL, 243 *ENUM_TYPE_TOKENS, 244 *NESTED_TYPE_TOKENS, 245 } 246 247 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 248 TokenType.BIGINT: TokenType.UBIGINT, 249 TokenType.INT: TokenType.UINT, 250 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 251 TokenType.SMALLINT: TokenType.USMALLINT, 252 TokenType.TINYINT: TokenType.UTINYINT, 253 TokenType.DECIMAL: TokenType.UDECIMAL, 254 } 255 256 SUBQUERY_PREDICATES = { 257 TokenType.ANY: exp.Any, 258 TokenType.ALL: exp.All, 259 TokenType.EXISTS: exp.Exists, 260 TokenType.SOME: exp.Any, 261 } 262 263 RESERVED_TOKENS = { 264 *Tokenizer.SINGLE_TOKENS.values(), 265 TokenType.SELECT, 266 } 267 268 DB_CREATABLES = { 269 TokenType.DATABASE, 270 TokenType.SCHEMA, 271 TokenType.TABLE, 272 TokenType.VIEW, 273 TokenType.MODEL, 274 TokenType.DICTIONARY, 275 } 276 277 CREATABLES = { 278 TokenType.COLUMN, 279 TokenType.CONSTRAINT, 280 TokenType.FUNCTION, 281 TokenType.INDEX, 282 TokenType.PROCEDURE, 283 TokenType.FOREIGN_KEY, 284 *DB_CREATABLES, 285 } 286 287 # Tokens that can represent identifiers 288 ID_VAR_TOKENS = { 289 TokenType.VAR, 290 TokenType.ANTI, 291 TokenType.APPLY, 292 TokenType.ASC, 293 TokenType.AUTO_INCREMENT, 294 TokenType.BEGIN, 295 TokenType.CACHE, 296 TokenType.CASE, 297 TokenType.COLLATE, 298 TokenType.COMMAND, 299 TokenType.COMMENT, 300 TokenType.COMMIT, 301 TokenType.CONSTRAINT, 302 TokenType.DEFAULT, 303 TokenType.DELETE, 304 TokenType.DESC, 305 TokenType.DESCRIBE, 306 TokenType.DICTIONARY, 307 TokenType.DIV, 308 TokenType.END, 309 TokenType.EXECUTE, 310 TokenType.ESCAPE, 311 TokenType.FALSE, 312 TokenType.FIRST, 313 TokenType.FILTER, 314 TokenType.FINAL, 315 TokenType.FORMAT, 316 TokenType.FULL, 317 TokenType.IS, 318 TokenType.ISNULL, 319 TokenType.INTERVAL, 320 TokenType.KEEP, 321 TokenType.KILL, 322 TokenType.LEFT, 323 TokenType.LOAD, 324 TokenType.MERGE, 325 TokenType.NATURAL, 326 TokenType.NEXT, 327 TokenType.OFFSET, 328 TokenType.OPERATOR, 329 TokenType.ORDINALITY, 330 TokenType.OVERLAPS, 331 TokenType.OVERWRITE, 332 TokenType.PARTITION, 333 TokenType.PERCENT, 334 TokenType.PIVOT, 335 TokenType.PRAGMA, 336 TokenType.RANGE, 337 TokenType.RECURSIVE, 338 TokenType.REFERENCES, 339 TokenType.REFRESH, 340 TokenType.REPLACE, 341 TokenType.RIGHT, 342 TokenType.ROW, 343 TokenType.ROWS, 344 TokenType.SEMI, 345 TokenType.SET, 346 TokenType.SETTINGS, 347 TokenType.SHOW, 348 TokenType.TEMPORARY, 349 TokenType.TOP, 350 TokenType.TRUE, 351 TokenType.UNIQUE, 352 TokenType.UNPIVOT, 353 TokenType.UPDATE, 354 TokenType.USE, 355 TokenType.VOLATILE, 356 TokenType.WINDOW, 357 *CREATABLES, 358 *SUBQUERY_PREDICATES, 359 *TYPE_TOKENS, 360 *NO_PAREN_FUNCTIONS, 361 } 362 363 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 364 365 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 366 TokenType.ANTI, 367 TokenType.APPLY, 368 TokenType.ASOF, 369 TokenType.FULL, 370 TokenType.LEFT, 371 TokenType.LOCK, 372 TokenType.NATURAL, 373 TokenType.OFFSET, 374 TokenType.RIGHT, 375 TokenType.SEMI, 376 TokenType.WINDOW, 377 } 378 379 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 380 381 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 382 383 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 384 385 FUNC_TOKENS = { 386 TokenType.COLLATE, 387 TokenType.COMMAND, 388 TokenType.CURRENT_DATE, 389 TokenType.CURRENT_DATETIME, 390 TokenType.CURRENT_TIMESTAMP, 391 TokenType.CURRENT_TIME, 392 TokenType.CURRENT_USER, 393 TokenType.FILTER, 394 TokenType.FIRST, 395 TokenType.FORMAT, 396 TokenType.GLOB, 397 TokenType.IDENTIFIER, 398 TokenType.INDEX, 399 TokenType.ISNULL, 400 TokenType.ILIKE, 401 TokenType.INSERT, 402 TokenType.LIKE, 403 TokenType.MERGE, 404 TokenType.OFFSET, 405 TokenType.PRIMARY_KEY, 406 TokenType.RANGE, 407 TokenType.REPLACE, 408 TokenType.RLIKE, 409 TokenType.ROW, 410 TokenType.UNNEST, 411 TokenType.VAR, 412 TokenType.LEFT, 413 TokenType.RIGHT, 414 TokenType.DATE, 415 TokenType.DATETIME, 416 TokenType.TABLE, 417 TokenType.TIMESTAMP, 418 TokenType.TIMESTAMPTZ, 419 TokenType.WINDOW, 420 TokenType.XOR, 421 *TYPE_TOKENS, 422 *SUBQUERY_PREDICATES, 423 } 424 425 CONJUNCTION = { 426 TokenType.AND: exp.And, 427 TokenType.OR: exp.Or, 428 } 429 430 EQUALITY = { 431 TokenType.COLON_EQ: exp.PropertyEQ, 432 TokenType.EQ: exp.EQ, 433 TokenType.NEQ: exp.NEQ, 434 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 435 } 436 437 COMPARISON = { 438 TokenType.GT: exp.GT, 439 TokenType.GTE: exp.GTE, 440 TokenType.LT: exp.LT, 441 TokenType.LTE: exp.LTE, 442 } 443 444 BITWISE = { 445 TokenType.AMP: exp.BitwiseAnd, 446 TokenType.CARET: exp.BitwiseXor, 447 TokenType.PIPE: exp.BitwiseOr, 448 } 449 450 TERM = { 451 TokenType.DASH: exp.Sub, 452 TokenType.PLUS: exp.Add, 453 TokenType.MOD: exp.Mod, 454 TokenType.COLLATE: exp.Collate, 455 } 456 457 FACTOR = { 458 TokenType.DIV: exp.IntDiv, 459 TokenType.LR_ARROW: exp.Distance, 460 TokenType.SLASH: exp.Div, 461 TokenType.STAR: exp.Mul, 462 } 463 464 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 465 466 TIMES = { 467 TokenType.TIME, 468 TokenType.TIMETZ, 469 } 470 471 TIMESTAMPS = { 472 TokenType.TIMESTAMP, 473 TokenType.TIMESTAMPTZ, 474 TokenType.TIMESTAMPLTZ, 475 *TIMES, 476 } 477 478 SET_OPERATIONS = { 479 TokenType.UNION, 480 TokenType.INTERSECT, 481 TokenType.EXCEPT, 482 } 483 484 JOIN_METHODS = { 485 TokenType.NATURAL, 486 TokenType.ASOF, 487 } 488 489 JOIN_SIDES = { 490 TokenType.LEFT, 491 TokenType.RIGHT, 492 TokenType.FULL, 493 } 494 495 JOIN_KINDS = { 496 TokenType.INNER, 497 TokenType.OUTER, 498 TokenType.CROSS, 499 TokenType.SEMI, 500 TokenType.ANTI, 501 } 502 503 JOIN_HINTS: t.Set[str] = set() 504 505 LAMBDAS = { 506 TokenType.ARROW: lambda self, expressions: self.expression( 507 exp.Lambda, 508 this=self._replace_lambda( 509 self._parse_conjunction(), 510 {node.name for node in expressions}, 511 ), 512 expressions=expressions, 513 ), 514 TokenType.FARROW: lambda self, expressions: self.expression( 515 exp.Kwarg, 516 this=exp.var(expressions[0].name), 517 expression=self._parse_conjunction(), 518 ), 519 } 520 521 COLUMN_OPERATORS = { 522 TokenType.DOT: None, 523 TokenType.DCOLON: lambda self, this, to: self.expression( 524 exp.Cast if self.STRICT_CAST else exp.TryCast, 525 this=this, 526 to=to, 527 ), 528 TokenType.ARROW: lambda self, this, path: self.expression( 529 exp.JSONExtract, 530 this=this, 531 expression=path, 532 ), 533 TokenType.DARROW: lambda self, this, path: self.expression( 534 exp.JSONExtractScalar, 535 this=this, 536 expression=path, 537 ), 538 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 539 exp.JSONBExtract, 540 this=this, 541 expression=path, 542 ), 543 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 544 exp.JSONBExtractScalar, 545 this=this, 546 expression=path, 547 ), 548 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 549 exp.JSONBContains, 550 this=this, 551 expression=key, 552 ), 553 } 554 555 EXPRESSION_PARSERS = { 556 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 557 exp.Column: lambda self: self._parse_column(), 558 exp.Condition: lambda self: self._parse_conjunction(), 559 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 560 exp.Expression: lambda self: self._parse_statement(), 561 exp.From: lambda self: self._parse_from(), 562 exp.Group: lambda self: self._parse_group(), 563 exp.Having: lambda self: self._parse_having(), 564 exp.Identifier: lambda self: self._parse_id_var(), 565 exp.Join: lambda self: self._parse_join(), 566 exp.Lambda: lambda self: self._parse_lambda(), 567 exp.Lateral: lambda self: self._parse_lateral(), 568 exp.Limit: lambda self: self._parse_limit(), 569 exp.Offset: lambda self: self._parse_offset(), 570 exp.Order: lambda self: self._parse_order(), 571 exp.Ordered: lambda self: self._parse_ordered(), 572 exp.Properties: lambda self: self._parse_properties(), 573 exp.Qualify: lambda self: self._parse_qualify(), 574 exp.Returning: lambda self: self._parse_returning(), 575 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 576 exp.Table: lambda self: self._parse_table_parts(), 577 exp.TableAlias: lambda self: self._parse_table_alias(), 578 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 579 exp.Where: lambda self: self._parse_where(), 580 exp.Window: lambda self: self._parse_named_window(), 581 exp.With: lambda self: self._parse_with(), 582 "JOIN_TYPE": lambda self: self._parse_join_parts(), 583 } 584 585 STATEMENT_PARSERS = { 586 TokenType.ALTER: lambda self: self._parse_alter(), 587 TokenType.BEGIN: lambda self: self._parse_transaction(), 588 TokenType.CACHE: lambda self: self._parse_cache(), 589 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 590 TokenType.COMMENT: lambda self: self._parse_comment(), 591 TokenType.CREATE: lambda self: self._parse_create(), 592 TokenType.DELETE: lambda self: self._parse_delete(), 593 TokenType.DESC: lambda self: self._parse_describe(), 594 TokenType.DESCRIBE: lambda self: self._parse_describe(), 595 TokenType.DROP: lambda self: self._parse_drop(), 596 TokenType.INSERT: lambda self: self._parse_insert(), 597 TokenType.KILL: lambda self: self._parse_kill(), 598 TokenType.LOAD: lambda self: self._parse_load(), 599 TokenType.MERGE: lambda self: self._parse_merge(), 600 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 601 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 602 TokenType.REFRESH: lambda self: self._parse_refresh(), 603 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 604 TokenType.SET: lambda self: self._parse_set(), 605 TokenType.UNCACHE: lambda self: self._parse_uncache(), 606 TokenType.UPDATE: lambda self: self._parse_update(), 607 TokenType.USE: lambda self: self.expression( 608 exp.Use, 609 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 610 and exp.var(self._prev.text), 611 this=self._parse_table(schema=False), 612 ), 613 } 614 615 UNARY_PARSERS = { 616 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 617 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 618 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 619 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 620 } 621 622 PRIMARY_PARSERS = { 623 TokenType.STRING: lambda self, token: self.expression( 624 exp.Literal, this=token.text, is_string=True 625 ), 626 TokenType.NUMBER: lambda self, token: self.expression( 627 exp.Literal, this=token.text, is_string=False 628 ), 629 TokenType.STAR: lambda self, _: self.expression( 630 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 631 ), 632 TokenType.NULL: lambda self, _: self.expression(exp.Null), 633 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 634 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 635 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 636 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 637 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 638 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 639 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 640 exp.National, this=token.text 641 ), 642 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 643 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 644 exp.RawString, this=token.text 645 ), 646 TokenType.UNICODE_STRING: lambda self, token: self.expression( 647 exp.UnicodeString, 648 this=token.text, 649 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 650 ), 651 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 652 } 653 654 PLACEHOLDER_PARSERS = { 655 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 656 TokenType.PARAMETER: lambda self: self._parse_parameter(), 657 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 658 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 659 else None, 660 } 661 662 RANGE_PARSERS = { 663 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 664 TokenType.GLOB: binary_range_parser(exp.Glob), 665 TokenType.ILIKE: binary_range_parser(exp.ILike), 666 TokenType.IN: lambda self, this: self._parse_in(this), 667 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 668 TokenType.IS: lambda self, this: self._parse_is(this), 669 TokenType.LIKE: binary_range_parser(exp.Like), 670 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 671 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 672 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 673 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 674 } 675 676 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 677 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 678 "AUTO": lambda self: self._parse_auto_property(), 679 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 680 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 681 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 682 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 683 "CHECKSUM": lambda self: self._parse_checksum(), 684 "CLUSTER BY": lambda self: self._parse_cluster(), 685 "CLUSTERED": lambda self: self._parse_clustered_by(), 686 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 687 exp.CollateProperty, **kwargs 688 ), 689 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 690 "CONTAINS": lambda self: self._parse_contains_property(), 691 "COPY": lambda self: self._parse_copy_property(), 692 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 693 "DEFINER": lambda self: self._parse_definer(), 694 "DETERMINISTIC": lambda self: self.expression( 695 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 696 ), 697 "DISTKEY": lambda self: self._parse_distkey(), 698 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 699 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 700 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 701 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 702 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 703 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 704 "FREESPACE": lambda self: self._parse_freespace(), 705 "HEAP": lambda self: self.expression(exp.HeapProperty), 706 "IMMUTABLE": lambda self: self.expression( 707 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 708 ), 709 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 710 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 711 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 712 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 713 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 714 "LIKE": lambda self: self._parse_create_like(), 715 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 716 "LOCK": lambda self: self._parse_locking(), 717 "LOCKING": lambda self: self._parse_locking(), 718 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 719 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 720 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 721 "MODIFIES": lambda self: self._parse_modifies_property(), 722 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 723 "NO": lambda self: self._parse_no_property(), 724 "ON": lambda self: self._parse_on_property(), 725 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 726 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 727 "PARTITION": lambda self: self._parse_partitioned_of(), 728 "PARTITION BY": lambda self: self._parse_partitioned_by(), 729 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 730 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 731 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 732 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 733 "READS": lambda self: self._parse_reads_property(), 734 "REMOTE": lambda self: self._parse_remote_with_connection(), 735 "RETURNS": lambda self: self._parse_returns(), 736 "ROW": lambda self: self._parse_row(), 737 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 738 "SAMPLE": lambda self: self.expression( 739 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 740 ), 741 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 742 "SETTINGS": lambda self: self.expression( 743 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 744 ), 745 "SORTKEY": lambda self: self._parse_sortkey(), 746 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 747 "STABLE": lambda self: self.expression( 748 exp.StabilityProperty, this=exp.Literal.string("STABLE") 749 ), 750 "STORED": lambda self: self._parse_stored(), 751 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 752 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 753 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 754 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 755 "TO": lambda self: self._parse_to_table(), 756 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 757 "TRANSFORM": lambda self: self.expression( 758 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 759 ), 760 "TTL": lambda self: self._parse_ttl(), 761 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 762 "VOLATILE": lambda self: self._parse_volatile_property(), 763 "WITH": lambda self: self._parse_with_property(), 764 } 765 766 CONSTRAINT_PARSERS = { 767 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 768 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 769 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 770 "CHARACTER SET": lambda self: self.expression( 771 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 772 ), 773 "CHECK": lambda self: self.expression( 774 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 775 ), 776 "COLLATE": lambda self: self.expression( 777 exp.CollateColumnConstraint, this=self._parse_var() 778 ), 779 "COMMENT": lambda self: self.expression( 780 exp.CommentColumnConstraint, this=self._parse_string() 781 ), 782 "COMPRESS": lambda self: self._parse_compress(), 783 "CLUSTERED": lambda self: self.expression( 784 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 785 ), 786 "NONCLUSTERED": lambda self: self.expression( 787 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 788 ), 789 "DEFAULT": lambda self: self.expression( 790 exp.DefaultColumnConstraint, this=self._parse_bitwise() 791 ), 792 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 793 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 794 "FORMAT": lambda self: self.expression( 795 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 796 ), 797 "GENERATED": lambda self: self._parse_generated_as_identity(), 798 "IDENTITY": lambda self: self._parse_auto_increment(), 799 "INLINE": lambda self: self._parse_inline(), 800 "LIKE": lambda self: self._parse_create_like(), 801 "NOT": lambda self: self._parse_not_constraint(), 802 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 803 "ON": lambda self: ( 804 self._match(TokenType.UPDATE) 805 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 806 ) 807 or self.expression(exp.OnProperty, this=self._parse_id_var()), 808 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 809 "PERIOD": lambda self: self._parse_period_for_system_time(), 810 "PRIMARY KEY": lambda self: self._parse_primary_key(), 811 "REFERENCES": lambda self: self._parse_references(match=False), 812 "TITLE": lambda self: self.expression( 813 exp.TitleColumnConstraint, this=self._parse_var_or_string() 814 ), 815 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 816 "UNIQUE": lambda self: self._parse_unique(), 817 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 818 "WITH": lambda self: self.expression( 819 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 820 ), 821 } 822 823 ALTER_PARSERS = { 824 "ADD": lambda self: self._parse_alter_table_add(), 825 "ALTER": lambda self: self._parse_alter_table_alter(), 826 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 827 "DROP": lambda self: self._parse_alter_table_drop(), 828 "RENAME": lambda self: self._parse_alter_table_rename(), 829 } 830 831 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 832 833 NO_PAREN_FUNCTION_PARSERS = { 834 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 835 "CASE": lambda self: self._parse_case(), 836 "IF": lambda self: self._parse_if(), 837 "NEXT": lambda self: self._parse_next_value_for(), 838 } 839 840 INVALID_FUNC_NAME_TOKENS = { 841 TokenType.IDENTIFIER, 842 TokenType.STRING, 843 } 844 845 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 846 847 FUNCTION_PARSERS = { 848 "ANY_VALUE": lambda self: self._parse_any_value(), 849 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 850 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 851 "DECODE": lambda self: self._parse_decode(), 852 "EXTRACT": lambda self: self._parse_extract(), 853 "JSON_OBJECT": lambda self: self._parse_json_object(), 854 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 855 "JSON_TABLE": lambda self: self._parse_json_table(), 856 "MATCH": lambda self: self._parse_match_against(), 857 "OPENJSON": lambda self: self._parse_open_json(), 858 "POSITION": lambda self: self._parse_position(), 859 "PREDICT": lambda self: self._parse_predict(), 860 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 861 "STRING_AGG": lambda self: self._parse_string_agg(), 862 "SUBSTRING": lambda self: self._parse_substring(), 863 "TRIM": lambda self: self._parse_trim(), 864 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 865 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 866 } 867 868 QUERY_MODIFIER_PARSERS = { 869 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 870 TokenType.WHERE: lambda self: ("where", self._parse_where()), 871 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 872 TokenType.HAVING: lambda self: ("having", self._parse_having()), 873 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 874 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 875 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 876 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 877 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 878 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 879 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 880 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 881 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 882 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 883 TokenType.CLUSTER_BY: lambda self: ( 884 "cluster", 885 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 886 ), 887 TokenType.DISTRIBUTE_BY: lambda self: ( 888 "distribute", 889 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 890 ), 891 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 892 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 893 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 894 } 895 896 SET_PARSERS = { 897 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 898 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 899 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 900 "TRANSACTION": lambda self: self._parse_set_transaction(), 901 } 902 903 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 904 905 TYPE_LITERAL_PARSERS = { 906 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 907 } 908 909 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 910 911 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 912 913 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 914 915 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 916 TRANSACTION_CHARACTERISTICS = { 917 "ISOLATION LEVEL REPEATABLE READ", 918 "ISOLATION LEVEL READ COMMITTED", 919 "ISOLATION LEVEL READ UNCOMMITTED", 920 "ISOLATION LEVEL SERIALIZABLE", 921 "READ WRITE", 922 "READ ONLY", 923 } 924 925 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 926 927 CLONE_KEYWORDS = {"CLONE", "COPY"} 928 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 929 930 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 931 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 932 933 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 934 935 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 936 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 937 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 938 939 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 940 941 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 942 943 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 944 945 DISTINCT_TOKENS = {TokenType.DISTINCT} 946 947 NULL_TOKENS = {TokenType.NULL} 948 949 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 950 951 STRICT_CAST = True 952 953 PREFIXED_PIVOT_COLUMNS = False 954 IDENTIFY_PIVOT_STRINGS = False 955 956 LOG_DEFAULTS_TO_LN = False 957 958 # Whether or not ADD is present for each column added by ALTER TABLE 959 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 960 961 # Whether or not the table sample clause expects CSV syntax 962 TABLESAMPLE_CSV = False 963 964 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 965 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 966 967 # Whether the TRIM function expects the characters to trim as its first argument 968 TRIM_PATTERN_FIRST = False 969 970 # Whether or not string aliases are supported `SELECT COUNT(*) 'count'` 971 STRING_ALIASES = False 972 973 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 974 MODIFIERS_ATTACHED_TO_UNION = True 975 UNION_MODIFIERS = {"order", "limit", "offset"} 976 977 __slots__ = ( 978 "error_level", 979 "error_message_context", 980 "max_errors", 981 "dialect", 982 "sql", 983 "errors", 984 "_tokens", 985 "_index", 986 "_curr", 987 "_next", 988 "_prev", 989 "_prev_comments", 990 ) 991 992 # Autofilled 993 SHOW_TRIE: t.Dict = {} 994 SET_TRIE: t.Dict = {} 995 996 def __init__( 997 self, 998 error_level: t.Optional[ErrorLevel] = None, 999 error_message_context: int = 100, 1000 max_errors: int = 3, 1001 dialect: DialectType = None, 1002 ): 1003 from sqlglot.dialects import Dialect 1004 1005 self.error_level = error_level or ErrorLevel.IMMEDIATE 1006 self.error_message_context = error_message_context 1007 self.max_errors = max_errors 1008 self.dialect = Dialect.get_or_raise(dialect) 1009 self.reset() 1010 1011 def reset(self): 1012 self.sql = "" 1013 self.errors = [] 1014 self._tokens = [] 1015 self._index = 0 1016 self._curr = None 1017 self._next = None 1018 self._prev = None 1019 self._prev_comments = None 1020 1021 def parse( 1022 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1023 ) -> t.List[t.Optional[exp.Expression]]: 1024 """ 1025 Parses a list of tokens and returns a list of syntax trees, one tree 1026 per parsed SQL statement. 1027 1028 Args: 1029 raw_tokens: The list of tokens. 1030 sql: The original SQL string, used to produce helpful debug messages. 1031 1032 Returns: 1033 The list of the produced syntax trees. 1034 """ 1035 return self._parse( 1036 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1037 ) 1038 1039 def parse_into( 1040 self, 1041 expression_types: exp.IntoType, 1042 raw_tokens: t.List[Token], 1043 sql: t.Optional[str] = None, 1044 ) -> t.List[t.Optional[exp.Expression]]: 1045 """ 1046 Parses a list of tokens into a given Expression type. If a collection of Expression 1047 types is given instead, this method will try to parse the token list into each one 1048 of them, stopping at the first for which the parsing succeeds. 1049 1050 Args: 1051 expression_types: The expression type(s) to try and parse the token list into. 1052 raw_tokens: The list of tokens. 1053 sql: The original SQL string, used to produce helpful debug messages. 1054 1055 Returns: 1056 The target Expression. 1057 """ 1058 errors = [] 1059 for expression_type in ensure_list(expression_types): 1060 parser = self.EXPRESSION_PARSERS.get(expression_type) 1061 if not parser: 1062 raise TypeError(f"No parser registered for {expression_type}") 1063 1064 try: 1065 return self._parse(parser, raw_tokens, sql) 1066 except ParseError as e: 1067 e.errors[0]["into_expression"] = expression_type 1068 errors.append(e) 1069 1070 raise ParseError( 1071 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1072 errors=merge_errors(errors), 1073 ) from errors[-1] 1074 1075 def _parse( 1076 self, 1077 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1078 raw_tokens: t.List[Token], 1079 sql: t.Optional[str] = None, 1080 ) -> t.List[t.Optional[exp.Expression]]: 1081 self.reset() 1082 self.sql = sql or "" 1083 1084 total = len(raw_tokens) 1085 chunks: t.List[t.List[Token]] = [[]] 1086 1087 for i, token in enumerate(raw_tokens): 1088 if token.token_type == TokenType.SEMICOLON: 1089 if i < total - 1: 1090 chunks.append([]) 1091 else: 1092 chunks[-1].append(token) 1093 1094 expressions = [] 1095 1096 for tokens in chunks: 1097 self._index = -1 1098 self._tokens = tokens 1099 self._advance() 1100 1101 expressions.append(parse_method(self)) 1102 1103 if self._index < len(self._tokens): 1104 self.raise_error("Invalid expression / Unexpected token") 1105 1106 self.check_errors() 1107 1108 return expressions 1109 1110 def check_errors(self) -> None: 1111 """Logs or raises any found errors, depending on the chosen error level setting.""" 1112 if self.error_level == ErrorLevel.WARN: 1113 for error in self.errors: 1114 logger.error(str(error)) 1115 elif self.error_level == ErrorLevel.RAISE and self.errors: 1116 raise ParseError( 1117 concat_messages(self.errors, self.max_errors), 1118 errors=merge_errors(self.errors), 1119 ) 1120 1121 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1122 """ 1123 Appends an error in the list of recorded errors or raises it, depending on the chosen 1124 error level setting. 1125 """ 1126 token = token or self._curr or self._prev or Token.string("") 1127 start = token.start 1128 end = token.end + 1 1129 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1130 highlight = self.sql[start:end] 1131 end_context = self.sql[end : end + self.error_message_context] 1132 1133 error = ParseError.new( 1134 f"{message}. Line {token.line}, Col: {token.col}.\n" 1135 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1136 description=message, 1137 line=token.line, 1138 col=token.col, 1139 start_context=start_context, 1140 highlight=highlight, 1141 end_context=end_context, 1142 ) 1143 1144 if self.error_level == ErrorLevel.IMMEDIATE: 1145 raise error 1146 1147 self.errors.append(error) 1148 1149 def expression( 1150 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1151 ) -> E: 1152 """ 1153 Creates a new, validated Expression. 1154 1155 Args: 1156 exp_class: The expression class to instantiate. 1157 comments: An optional list of comments to attach to the expression. 1158 kwargs: The arguments to set for the expression along with their respective values. 1159 1160 Returns: 1161 The target expression. 1162 """ 1163 instance = exp_class(**kwargs) 1164 instance.add_comments(comments) if comments else self._add_comments(instance) 1165 return self.validate_expression(instance) 1166 1167 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1168 if expression and self._prev_comments: 1169 expression.add_comments(self._prev_comments) 1170 self._prev_comments = None 1171 1172 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1173 """ 1174 Validates an Expression, making sure that all its mandatory arguments are set. 1175 1176 Args: 1177 expression: The expression to validate. 1178 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1179 1180 Returns: 1181 The validated expression. 1182 """ 1183 if self.error_level != ErrorLevel.IGNORE: 1184 for error_message in expression.error_messages(args): 1185 self.raise_error(error_message) 1186 1187 return expression 1188 1189 def _find_sql(self, start: Token, end: Token) -> str: 1190 return self.sql[start.start : end.end + 1] 1191 1192 def _is_connected(self) -> bool: 1193 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1194 1195 def _advance(self, times: int = 1) -> None: 1196 self._index += times 1197 self._curr = seq_get(self._tokens, self._index) 1198 self._next = seq_get(self._tokens, self._index + 1) 1199 1200 if self._index > 0: 1201 self._prev = self._tokens[self._index - 1] 1202 self._prev_comments = self._prev.comments 1203 else: 1204 self._prev = None 1205 self._prev_comments = None 1206 1207 def _retreat(self, index: int) -> None: 1208 if index != self._index: 1209 self._advance(index - self._index) 1210 1211 def _parse_command(self) -> exp.Command: 1212 return self.expression( 1213 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1214 ) 1215 1216 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1217 start = self._prev 1218 exists = self._parse_exists() if allow_exists else None 1219 1220 self._match(TokenType.ON) 1221 1222 kind = self._match_set(self.CREATABLES) and self._prev 1223 if not kind: 1224 return self._parse_as_command(start) 1225 1226 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1227 this = self._parse_user_defined_function(kind=kind.token_type) 1228 elif kind.token_type == TokenType.TABLE: 1229 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1230 elif kind.token_type == TokenType.COLUMN: 1231 this = self._parse_column() 1232 else: 1233 this = self._parse_id_var() 1234 1235 self._match(TokenType.IS) 1236 1237 return self.expression( 1238 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1239 ) 1240 1241 def _parse_to_table( 1242 self, 1243 ) -> exp.ToTableProperty: 1244 table = self._parse_table_parts(schema=True) 1245 return self.expression(exp.ToTableProperty, this=table) 1246 1247 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1248 def _parse_ttl(self) -> exp.Expression: 1249 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1250 this = self._parse_bitwise() 1251 1252 if self._match_text_seq("DELETE"): 1253 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1254 if self._match_text_seq("RECOMPRESS"): 1255 return self.expression( 1256 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1257 ) 1258 if self._match_text_seq("TO", "DISK"): 1259 return self.expression( 1260 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1261 ) 1262 if self._match_text_seq("TO", "VOLUME"): 1263 return self.expression( 1264 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1265 ) 1266 1267 return this 1268 1269 expressions = self._parse_csv(_parse_ttl_action) 1270 where = self._parse_where() 1271 group = self._parse_group() 1272 1273 aggregates = None 1274 if group and self._match(TokenType.SET): 1275 aggregates = self._parse_csv(self._parse_set_item) 1276 1277 return self.expression( 1278 exp.MergeTreeTTL, 1279 expressions=expressions, 1280 where=where, 1281 group=group, 1282 aggregates=aggregates, 1283 ) 1284 1285 def _parse_statement(self) -> t.Optional[exp.Expression]: 1286 if self._curr is None: 1287 return None 1288 1289 if self._match_set(self.STATEMENT_PARSERS): 1290 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1291 1292 if self._match_set(Tokenizer.COMMANDS): 1293 return self._parse_command() 1294 1295 expression = self._parse_expression() 1296 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1297 return self._parse_query_modifiers(expression) 1298 1299 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1300 start = self._prev 1301 temporary = self._match(TokenType.TEMPORARY) 1302 materialized = self._match_text_seq("MATERIALIZED") 1303 1304 kind = self._match_set(self.CREATABLES) and self._prev.text 1305 if not kind: 1306 return self._parse_as_command(start) 1307 1308 return self.expression( 1309 exp.Drop, 1310 comments=start.comments, 1311 exists=exists or self._parse_exists(), 1312 this=self._parse_table(schema=True), 1313 kind=kind, 1314 temporary=temporary, 1315 materialized=materialized, 1316 cascade=self._match_text_seq("CASCADE"), 1317 constraints=self._match_text_seq("CONSTRAINTS"), 1318 purge=self._match_text_seq("PURGE"), 1319 ) 1320 1321 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1322 return ( 1323 self._match_text_seq("IF") 1324 and (not not_ or self._match(TokenType.NOT)) 1325 and self._match(TokenType.EXISTS) 1326 ) 1327 1328 def _parse_create(self) -> exp.Create | exp.Command: 1329 # Note: this can't be None because we've matched a statement parser 1330 start = self._prev 1331 comments = self._prev_comments 1332 1333 replace = start.text.upper() == "REPLACE" or self._match_pair( 1334 TokenType.OR, TokenType.REPLACE 1335 ) 1336 unique = self._match(TokenType.UNIQUE) 1337 1338 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1339 self._advance() 1340 1341 properties = None 1342 create_token = self._match_set(self.CREATABLES) and self._prev 1343 1344 if not create_token: 1345 # exp.Properties.Location.POST_CREATE 1346 properties = self._parse_properties() 1347 create_token = self._match_set(self.CREATABLES) and self._prev 1348 1349 if not properties or not create_token: 1350 return self._parse_as_command(start) 1351 1352 exists = self._parse_exists(not_=True) 1353 this = None 1354 expression: t.Optional[exp.Expression] = None 1355 indexes = None 1356 no_schema_binding = None 1357 begin = None 1358 end = None 1359 clone = None 1360 1361 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1362 nonlocal properties 1363 if properties and temp_props: 1364 properties.expressions.extend(temp_props.expressions) 1365 elif temp_props: 1366 properties = temp_props 1367 1368 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1369 this = self._parse_user_defined_function(kind=create_token.token_type) 1370 1371 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1372 extend_props(self._parse_properties()) 1373 1374 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1375 1376 if not expression: 1377 if self._match(TokenType.COMMAND): 1378 expression = self._parse_as_command(self._prev) 1379 else: 1380 begin = self._match(TokenType.BEGIN) 1381 return_ = self._match_text_seq("RETURN") 1382 1383 if self._match(TokenType.STRING, advance=False): 1384 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1385 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1386 expression = self._parse_string() 1387 extend_props(self._parse_properties()) 1388 else: 1389 expression = self._parse_statement() 1390 1391 end = self._match_text_seq("END") 1392 1393 if return_: 1394 expression = self.expression(exp.Return, this=expression) 1395 elif create_token.token_type == TokenType.INDEX: 1396 this = self._parse_index(index=self._parse_id_var()) 1397 elif create_token.token_type in self.DB_CREATABLES: 1398 table_parts = self._parse_table_parts(schema=True) 1399 1400 # exp.Properties.Location.POST_NAME 1401 self._match(TokenType.COMMA) 1402 extend_props(self._parse_properties(before=True)) 1403 1404 this = self._parse_schema(this=table_parts) 1405 1406 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1407 extend_props(self._parse_properties()) 1408 1409 self._match(TokenType.ALIAS) 1410 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1411 # exp.Properties.Location.POST_ALIAS 1412 extend_props(self._parse_properties()) 1413 1414 expression = self._parse_ddl_select() 1415 1416 if create_token.token_type == TokenType.TABLE: 1417 # exp.Properties.Location.POST_EXPRESSION 1418 extend_props(self._parse_properties()) 1419 1420 indexes = [] 1421 while True: 1422 index = self._parse_index() 1423 1424 # exp.Properties.Location.POST_INDEX 1425 extend_props(self._parse_properties()) 1426 1427 if not index: 1428 break 1429 else: 1430 self._match(TokenType.COMMA) 1431 indexes.append(index) 1432 elif create_token.token_type == TokenType.VIEW: 1433 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1434 no_schema_binding = True 1435 1436 shallow = self._match_text_seq("SHALLOW") 1437 1438 if self._match_texts(self.CLONE_KEYWORDS): 1439 copy = self._prev.text.lower() == "copy" 1440 clone = self.expression( 1441 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1442 ) 1443 1444 return self.expression( 1445 exp.Create, 1446 comments=comments, 1447 this=this, 1448 kind=create_token.text.upper(), 1449 replace=replace, 1450 unique=unique, 1451 expression=expression, 1452 exists=exists, 1453 properties=properties, 1454 indexes=indexes, 1455 no_schema_binding=no_schema_binding, 1456 begin=begin, 1457 end=end, 1458 clone=clone, 1459 ) 1460 1461 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1462 # only used for teradata currently 1463 self._match(TokenType.COMMA) 1464 1465 kwargs = { 1466 "no": self._match_text_seq("NO"), 1467 "dual": self._match_text_seq("DUAL"), 1468 "before": self._match_text_seq("BEFORE"), 1469 "default": self._match_text_seq("DEFAULT"), 1470 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1471 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1472 "after": self._match_text_seq("AFTER"), 1473 "minimum": self._match_texts(("MIN", "MINIMUM")), 1474 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1475 } 1476 1477 if self._match_texts(self.PROPERTY_PARSERS): 1478 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1479 try: 1480 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1481 except TypeError: 1482 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1483 1484 return None 1485 1486 def _parse_property(self) -> t.Optional[exp.Expression]: 1487 if self._match_texts(self.PROPERTY_PARSERS): 1488 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1489 1490 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1491 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1492 1493 if self._match_text_seq("COMPOUND", "SORTKEY"): 1494 return self._parse_sortkey(compound=True) 1495 1496 if self._match_text_seq("SQL", "SECURITY"): 1497 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1498 1499 index = self._index 1500 key = self._parse_column() 1501 1502 if not self._match(TokenType.EQ): 1503 self._retreat(index) 1504 return None 1505 1506 return self.expression( 1507 exp.Property, 1508 this=key.to_dot() if isinstance(key, exp.Column) else key, 1509 value=self._parse_column() or self._parse_var(any_token=True), 1510 ) 1511 1512 def _parse_stored(self) -> exp.FileFormatProperty: 1513 self._match(TokenType.ALIAS) 1514 1515 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1516 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1517 1518 return self.expression( 1519 exp.FileFormatProperty, 1520 this=self.expression( 1521 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1522 ) 1523 if input_format or output_format 1524 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1525 ) 1526 1527 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1528 self._match(TokenType.EQ) 1529 self._match(TokenType.ALIAS) 1530 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1531 1532 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1533 properties = [] 1534 while True: 1535 if before: 1536 prop = self._parse_property_before() 1537 else: 1538 prop = self._parse_property() 1539 1540 if not prop: 1541 break 1542 for p in ensure_list(prop): 1543 properties.append(p) 1544 1545 if properties: 1546 return self.expression(exp.Properties, expressions=properties) 1547 1548 return None 1549 1550 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1551 return self.expression( 1552 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1553 ) 1554 1555 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1556 if self._index >= 2: 1557 pre_volatile_token = self._tokens[self._index - 2] 1558 else: 1559 pre_volatile_token = None 1560 1561 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1562 return exp.VolatileProperty() 1563 1564 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1565 1566 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1567 self._match_pair(TokenType.EQ, TokenType.ON) 1568 1569 prop = self.expression(exp.WithSystemVersioningProperty) 1570 if self._match(TokenType.L_PAREN): 1571 self._match_text_seq("HISTORY_TABLE", "=") 1572 prop.set("this", self._parse_table_parts()) 1573 1574 if self._match(TokenType.COMMA): 1575 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1576 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1577 1578 self._match_r_paren() 1579 1580 return prop 1581 1582 def _parse_with_property( 1583 self, 1584 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1585 if self._match(TokenType.L_PAREN, advance=False): 1586 return self._parse_wrapped_csv(self._parse_property) 1587 1588 if self._match_text_seq("JOURNAL"): 1589 return self._parse_withjournaltable() 1590 1591 if self._match_text_seq("DATA"): 1592 return self._parse_withdata(no=False) 1593 elif self._match_text_seq("NO", "DATA"): 1594 return self._parse_withdata(no=True) 1595 1596 if not self._next: 1597 return None 1598 1599 return self._parse_withisolatedloading() 1600 1601 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1602 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1603 self._match(TokenType.EQ) 1604 1605 user = self._parse_id_var() 1606 self._match(TokenType.PARAMETER) 1607 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1608 1609 if not user or not host: 1610 return None 1611 1612 return exp.DefinerProperty(this=f"{user}@{host}") 1613 1614 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1615 self._match(TokenType.TABLE) 1616 self._match(TokenType.EQ) 1617 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1618 1619 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1620 return self.expression(exp.LogProperty, no=no) 1621 1622 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1623 return self.expression(exp.JournalProperty, **kwargs) 1624 1625 def _parse_checksum(self) -> exp.ChecksumProperty: 1626 self._match(TokenType.EQ) 1627 1628 on = None 1629 if self._match(TokenType.ON): 1630 on = True 1631 elif self._match_text_seq("OFF"): 1632 on = False 1633 1634 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1635 1636 def _parse_cluster(self) -> exp.Cluster: 1637 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1638 1639 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1640 self._match_text_seq("BY") 1641 1642 self._match_l_paren() 1643 expressions = self._parse_csv(self._parse_column) 1644 self._match_r_paren() 1645 1646 if self._match_text_seq("SORTED", "BY"): 1647 self._match_l_paren() 1648 sorted_by = self._parse_csv(self._parse_ordered) 1649 self._match_r_paren() 1650 else: 1651 sorted_by = None 1652 1653 self._match(TokenType.INTO) 1654 buckets = self._parse_number() 1655 self._match_text_seq("BUCKETS") 1656 1657 return self.expression( 1658 exp.ClusteredByProperty, 1659 expressions=expressions, 1660 sorted_by=sorted_by, 1661 buckets=buckets, 1662 ) 1663 1664 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1665 if not self._match_text_seq("GRANTS"): 1666 self._retreat(self._index - 1) 1667 return None 1668 1669 return self.expression(exp.CopyGrantsProperty) 1670 1671 def _parse_freespace(self) -> exp.FreespaceProperty: 1672 self._match(TokenType.EQ) 1673 return self.expression( 1674 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1675 ) 1676 1677 def _parse_mergeblockratio( 1678 self, no: bool = False, default: bool = False 1679 ) -> exp.MergeBlockRatioProperty: 1680 if self._match(TokenType.EQ): 1681 return self.expression( 1682 exp.MergeBlockRatioProperty, 1683 this=self._parse_number(), 1684 percent=self._match(TokenType.PERCENT), 1685 ) 1686 1687 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1688 1689 def _parse_datablocksize( 1690 self, 1691 default: t.Optional[bool] = None, 1692 minimum: t.Optional[bool] = None, 1693 maximum: t.Optional[bool] = None, 1694 ) -> exp.DataBlocksizeProperty: 1695 self._match(TokenType.EQ) 1696 size = self._parse_number() 1697 1698 units = None 1699 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1700 units = self._prev.text 1701 1702 return self.expression( 1703 exp.DataBlocksizeProperty, 1704 size=size, 1705 units=units, 1706 default=default, 1707 minimum=minimum, 1708 maximum=maximum, 1709 ) 1710 1711 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1712 self._match(TokenType.EQ) 1713 always = self._match_text_seq("ALWAYS") 1714 manual = self._match_text_seq("MANUAL") 1715 never = self._match_text_seq("NEVER") 1716 default = self._match_text_seq("DEFAULT") 1717 1718 autotemp = None 1719 if self._match_text_seq("AUTOTEMP"): 1720 autotemp = self._parse_schema() 1721 1722 return self.expression( 1723 exp.BlockCompressionProperty, 1724 always=always, 1725 manual=manual, 1726 never=never, 1727 default=default, 1728 autotemp=autotemp, 1729 ) 1730 1731 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1732 no = self._match_text_seq("NO") 1733 concurrent = self._match_text_seq("CONCURRENT") 1734 self._match_text_seq("ISOLATED", "LOADING") 1735 for_all = self._match_text_seq("FOR", "ALL") 1736 for_insert = self._match_text_seq("FOR", "INSERT") 1737 for_none = self._match_text_seq("FOR", "NONE") 1738 return self.expression( 1739 exp.IsolatedLoadingProperty, 1740 no=no, 1741 concurrent=concurrent, 1742 for_all=for_all, 1743 for_insert=for_insert, 1744 for_none=for_none, 1745 ) 1746 1747 def _parse_locking(self) -> exp.LockingProperty: 1748 if self._match(TokenType.TABLE): 1749 kind = "TABLE" 1750 elif self._match(TokenType.VIEW): 1751 kind = "VIEW" 1752 elif self._match(TokenType.ROW): 1753 kind = "ROW" 1754 elif self._match_text_seq("DATABASE"): 1755 kind = "DATABASE" 1756 else: 1757 kind = None 1758 1759 if kind in ("DATABASE", "TABLE", "VIEW"): 1760 this = self._parse_table_parts() 1761 else: 1762 this = None 1763 1764 if self._match(TokenType.FOR): 1765 for_or_in = "FOR" 1766 elif self._match(TokenType.IN): 1767 for_or_in = "IN" 1768 else: 1769 for_or_in = None 1770 1771 if self._match_text_seq("ACCESS"): 1772 lock_type = "ACCESS" 1773 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1774 lock_type = "EXCLUSIVE" 1775 elif self._match_text_seq("SHARE"): 1776 lock_type = "SHARE" 1777 elif self._match_text_seq("READ"): 1778 lock_type = "READ" 1779 elif self._match_text_seq("WRITE"): 1780 lock_type = "WRITE" 1781 elif self._match_text_seq("CHECKSUM"): 1782 lock_type = "CHECKSUM" 1783 else: 1784 lock_type = None 1785 1786 override = self._match_text_seq("OVERRIDE") 1787 1788 return self.expression( 1789 exp.LockingProperty, 1790 this=this, 1791 kind=kind, 1792 for_or_in=for_or_in, 1793 lock_type=lock_type, 1794 override=override, 1795 ) 1796 1797 def _parse_partition_by(self) -> t.List[exp.Expression]: 1798 if self._match(TokenType.PARTITION_BY): 1799 return self._parse_csv(self._parse_conjunction) 1800 return [] 1801 1802 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1803 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1804 if self._match_text_seq("MINVALUE"): 1805 return exp.var("MINVALUE") 1806 if self._match_text_seq("MAXVALUE"): 1807 return exp.var("MAXVALUE") 1808 return self._parse_bitwise() 1809 1810 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1811 expression = None 1812 from_expressions = None 1813 to_expressions = None 1814 1815 if self._match(TokenType.IN): 1816 this = self._parse_wrapped_csv(self._parse_bitwise) 1817 elif self._match(TokenType.FROM): 1818 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1819 self._match_text_seq("TO") 1820 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1821 elif self._match_text_seq("WITH", "(", "MODULUS"): 1822 this = self._parse_number() 1823 self._match_text_seq(",", "REMAINDER") 1824 expression = self._parse_number() 1825 self._match_r_paren() 1826 else: 1827 self.raise_error("Failed to parse partition bound spec.") 1828 1829 return self.expression( 1830 exp.PartitionBoundSpec, 1831 this=this, 1832 expression=expression, 1833 from_expressions=from_expressions, 1834 to_expressions=to_expressions, 1835 ) 1836 1837 # https://www.postgresql.org/docs/current/sql-createtable.html 1838 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1839 if not self._match_text_seq("OF"): 1840 self._retreat(self._index - 1) 1841 return None 1842 1843 this = self._parse_table(schema=True) 1844 1845 if self._match(TokenType.DEFAULT): 1846 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1847 elif self._match_text_seq("FOR", "VALUES"): 1848 expression = self._parse_partition_bound_spec() 1849 else: 1850 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1851 1852 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1853 1854 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1855 self._match(TokenType.EQ) 1856 return self.expression( 1857 exp.PartitionedByProperty, 1858 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1859 ) 1860 1861 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1862 if self._match_text_seq("AND", "STATISTICS"): 1863 statistics = True 1864 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1865 statistics = False 1866 else: 1867 statistics = None 1868 1869 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1870 1871 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1872 if self._match_text_seq("SQL"): 1873 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 1874 return None 1875 1876 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1877 if self._match_text_seq("SQL", "DATA"): 1878 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 1879 return None 1880 1881 def _parse_no_property(self) -> t.Optional[exp.Expression]: 1882 if self._match_text_seq("PRIMARY", "INDEX"): 1883 return exp.NoPrimaryIndexProperty() 1884 if self._match_text_seq("SQL"): 1885 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 1886 return None 1887 1888 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1889 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1890 return exp.OnCommitProperty() 1891 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1892 return exp.OnCommitProperty(delete=True) 1893 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1894 1895 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1896 if self._match_text_seq("SQL", "DATA"): 1897 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 1898 return None 1899 1900 def _parse_distkey(self) -> exp.DistKeyProperty: 1901 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1902 1903 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1904 table = self._parse_table(schema=True) 1905 1906 options = [] 1907 while self._match_texts(("INCLUDING", "EXCLUDING")): 1908 this = self._prev.text.upper() 1909 1910 id_var = self._parse_id_var() 1911 if not id_var: 1912 return None 1913 1914 options.append( 1915 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1916 ) 1917 1918 return self.expression(exp.LikeProperty, this=table, expressions=options) 1919 1920 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1921 return self.expression( 1922 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1923 ) 1924 1925 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1926 self._match(TokenType.EQ) 1927 return self.expression( 1928 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1929 ) 1930 1931 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1932 self._match_text_seq("WITH", "CONNECTION") 1933 return self.expression( 1934 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1935 ) 1936 1937 def _parse_returns(self) -> exp.ReturnsProperty: 1938 value: t.Optional[exp.Expression] 1939 is_table = self._match(TokenType.TABLE) 1940 1941 if is_table: 1942 if self._match(TokenType.LT): 1943 value = self.expression( 1944 exp.Schema, 1945 this="TABLE", 1946 expressions=self._parse_csv(self._parse_struct_types), 1947 ) 1948 if not self._match(TokenType.GT): 1949 self.raise_error("Expecting >") 1950 else: 1951 value = self._parse_schema(exp.var("TABLE")) 1952 else: 1953 value = self._parse_types() 1954 1955 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1956 1957 def _parse_describe(self) -> exp.Describe: 1958 kind = self._match_set(self.CREATABLES) and self._prev.text 1959 extended = self._match_text_seq("EXTENDED") 1960 this = self._parse_table(schema=True) 1961 properties = self._parse_properties() 1962 expressions = properties.expressions if properties else None 1963 return self.expression( 1964 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 1965 ) 1966 1967 def _parse_insert(self) -> exp.Insert: 1968 comments = ensure_list(self._prev_comments) 1969 overwrite = self._match(TokenType.OVERWRITE) 1970 ignore = self._match(TokenType.IGNORE) 1971 local = self._match_text_seq("LOCAL") 1972 alternative = None 1973 1974 if self._match_text_seq("DIRECTORY"): 1975 this: t.Optional[exp.Expression] = self.expression( 1976 exp.Directory, 1977 this=self._parse_var_or_string(), 1978 local=local, 1979 row_format=self._parse_row_format(match_row=True), 1980 ) 1981 else: 1982 if self._match(TokenType.OR): 1983 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1984 1985 self._match(TokenType.INTO) 1986 comments += ensure_list(self._prev_comments) 1987 self._match(TokenType.TABLE) 1988 this = self._parse_table(schema=True) 1989 1990 returning = self._parse_returning() 1991 1992 return self.expression( 1993 exp.Insert, 1994 comments=comments, 1995 this=this, 1996 by_name=self._match_text_seq("BY", "NAME"), 1997 exists=self._parse_exists(), 1998 partition=self._parse_partition(), 1999 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2000 and self._parse_conjunction(), 2001 expression=self._parse_ddl_select(), 2002 conflict=self._parse_on_conflict(), 2003 returning=returning or self._parse_returning(), 2004 overwrite=overwrite, 2005 alternative=alternative, 2006 ignore=ignore, 2007 ) 2008 2009 def _parse_kill(self) -> exp.Kill: 2010 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2011 2012 return self.expression( 2013 exp.Kill, 2014 this=self._parse_primary(), 2015 kind=kind, 2016 ) 2017 2018 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2019 conflict = self._match_text_seq("ON", "CONFLICT") 2020 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2021 2022 if not conflict and not duplicate: 2023 return None 2024 2025 nothing = None 2026 expressions = None 2027 key = None 2028 constraint = None 2029 2030 if conflict: 2031 if self._match_text_seq("ON", "CONSTRAINT"): 2032 constraint = self._parse_id_var() 2033 else: 2034 key = self._parse_csv(self._parse_value) 2035 2036 self._match_text_seq("DO") 2037 if self._match_text_seq("NOTHING"): 2038 nothing = True 2039 else: 2040 self._match(TokenType.UPDATE) 2041 self._match(TokenType.SET) 2042 expressions = self._parse_csv(self._parse_equality) 2043 2044 return self.expression( 2045 exp.OnConflict, 2046 duplicate=duplicate, 2047 expressions=expressions, 2048 nothing=nothing, 2049 key=key, 2050 constraint=constraint, 2051 ) 2052 2053 def _parse_returning(self) -> t.Optional[exp.Returning]: 2054 if not self._match(TokenType.RETURNING): 2055 return None 2056 return self.expression( 2057 exp.Returning, 2058 expressions=self._parse_csv(self._parse_expression), 2059 into=self._match(TokenType.INTO) and self._parse_table_part(), 2060 ) 2061 2062 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2063 if not self._match(TokenType.FORMAT): 2064 return None 2065 return self._parse_row_format() 2066 2067 def _parse_row_format( 2068 self, match_row: bool = False 2069 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2070 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2071 return None 2072 2073 if self._match_text_seq("SERDE"): 2074 this = self._parse_string() 2075 2076 serde_properties = None 2077 if self._match(TokenType.SERDE_PROPERTIES): 2078 serde_properties = self.expression( 2079 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2080 ) 2081 2082 return self.expression( 2083 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2084 ) 2085 2086 self._match_text_seq("DELIMITED") 2087 2088 kwargs = {} 2089 2090 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2091 kwargs["fields"] = self._parse_string() 2092 if self._match_text_seq("ESCAPED", "BY"): 2093 kwargs["escaped"] = self._parse_string() 2094 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2095 kwargs["collection_items"] = self._parse_string() 2096 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2097 kwargs["map_keys"] = self._parse_string() 2098 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2099 kwargs["lines"] = self._parse_string() 2100 if self._match_text_seq("NULL", "DEFINED", "AS"): 2101 kwargs["null"] = self._parse_string() 2102 2103 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2104 2105 def _parse_load(self) -> exp.LoadData | exp.Command: 2106 if self._match_text_seq("DATA"): 2107 local = self._match_text_seq("LOCAL") 2108 self._match_text_seq("INPATH") 2109 inpath = self._parse_string() 2110 overwrite = self._match(TokenType.OVERWRITE) 2111 self._match_pair(TokenType.INTO, TokenType.TABLE) 2112 2113 return self.expression( 2114 exp.LoadData, 2115 this=self._parse_table(schema=True), 2116 local=local, 2117 overwrite=overwrite, 2118 inpath=inpath, 2119 partition=self._parse_partition(), 2120 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2121 serde=self._match_text_seq("SERDE") and self._parse_string(), 2122 ) 2123 return self._parse_as_command(self._prev) 2124 2125 def _parse_delete(self) -> exp.Delete: 2126 # This handles MySQL's "Multiple-Table Syntax" 2127 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2128 tables = None 2129 comments = self._prev_comments 2130 if not self._match(TokenType.FROM, advance=False): 2131 tables = self._parse_csv(self._parse_table) or None 2132 2133 returning = self._parse_returning() 2134 2135 return self.expression( 2136 exp.Delete, 2137 comments=comments, 2138 tables=tables, 2139 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2140 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2141 where=self._parse_where(), 2142 returning=returning or self._parse_returning(), 2143 limit=self._parse_limit(), 2144 ) 2145 2146 def _parse_update(self) -> exp.Update: 2147 comments = self._prev_comments 2148 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2149 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2150 returning = self._parse_returning() 2151 return self.expression( 2152 exp.Update, 2153 comments=comments, 2154 **{ # type: ignore 2155 "this": this, 2156 "expressions": expressions, 2157 "from": self._parse_from(joins=True), 2158 "where": self._parse_where(), 2159 "returning": returning or self._parse_returning(), 2160 "order": self._parse_order(), 2161 "limit": self._parse_limit(), 2162 }, 2163 ) 2164 2165 def _parse_uncache(self) -> exp.Uncache: 2166 if not self._match(TokenType.TABLE): 2167 self.raise_error("Expecting TABLE after UNCACHE") 2168 2169 return self.expression( 2170 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2171 ) 2172 2173 def _parse_cache(self) -> exp.Cache: 2174 lazy = self._match_text_seq("LAZY") 2175 self._match(TokenType.TABLE) 2176 table = self._parse_table(schema=True) 2177 2178 options = [] 2179 if self._match_text_seq("OPTIONS"): 2180 self._match_l_paren() 2181 k = self._parse_string() 2182 self._match(TokenType.EQ) 2183 v = self._parse_string() 2184 options = [k, v] 2185 self._match_r_paren() 2186 2187 self._match(TokenType.ALIAS) 2188 return self.expression( 2189 exp.Cache, 2190 this=table, 2191 lazy=lazy, 2192 options=options, 2193 expression=self._parse_select(nested=True), 2194 ) 2195 2196 def _parse_partition(self) -> t.Optional[exp.Partition]: 2197 if not self._match(TokenType.PARTITION): 2198 return None 2199 2200 return self.expression( 2201 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2202 ) 2203 2204 def _parse_value(self) -> exp.Tuple: 2205 if self._match(TokenType.L_PAREN): 2206 expressions = self._parse_csv(self._parse_expression) 2207 self._match_r_paren() 2208 return self.expression(exp.Tuple, expressions=expressions) 2209 2210 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2211 # https://prestodb.io/docs/current/sql/values.html 2212 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2213 2214 def _parse_projections(self) -> t.List[exp.Expression]: 2215 return self._parse_expressions() 2216 2217 def _parse_select( 2218 self, 2219 nested: bool = False, 2220 table: bool = False, 2221 parse_subquery_alias: bool = True, 2222 parse_set_operation: bool = True, 2223 ) -> t.Optional[exp.Expression]: 2224 cte = self._parse_with() 2225 2226 if cte: 2227 this = self._parse_statement() 2228 2229 if not this: 2230 self.raise_error("Failed to parse any statement following CTE") 2231 return cte 2232 2233 if "with" in this.arg_types: 2234 this.set("with", cte) 2235 else: 2236 self.raise_error(f"{this.key} does not support CTE") 2237 this = cte 2238 2239 return this 2240 2241 # duckdb supports leading with FROM x 2242 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2243 2244 if self._match(TokenType.SELECT): 2245 comments = self._prev_comments 2246 2247 hint = self._parse_hint() 2248 all_ = self._match(TokenType.ALL) 2249 distinct = self._match_set(self.DISTINCT_TOKENS) 2250 2251 kind = ( 2252 self._match(TokenType.ALIAS) 2253 and self._match_texts(("STRUCT", "VALUE")) 2254 and self._prev.text.upper() 2255 ) 2256 2257 if distinct: 2258 distinct = self.expression( 2259 exp.Distinct, 2260 on=self._parse_value() if self._match(TokenType.ON) else None, 2261 ) 2262 2263 if all_ and distinct: 2264 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2265 2266 limit = self._parse_limit(top=True) 2267 projections = self._parse_projections() 2268 2269 this = self.expression( 2270 exp.Select, 2271 kind=kind, 2272 hint=hint, 2273 distinct=distinct, 2274 expressions=projections, 2275 limit=limit, 2276 ) 2277 this.comments = comments 2278 2279 into = self._parse_into() 2280 if into: 2281 this.set("into", into) 2282 2283 if not from_: 2284 from_ = self._parse_from() 2285 2286 if from_: 2287 this.set("from", from_) 2288 2289 this = self._parse_query_modifiers(this) 2290 elif (table or nested) and self._match(TokenType.L_PAREN): 2291 if self._match(TokenType.PIVOT): 2292 this = self._parse_simplified_pivot() 2293 elif self._match(TokenType.FROM): 2294 this = exp.select("*").from_( 2295 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2296 ) 2297 else: 2298 this = ( 2299 self._parse_table() 2300 if table 2301 else self._parse_select(nested=True, parse_set_operation=False) 2302 ) 2303 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2304 2305 self._match_r_paren() 2306 2307 # We return early here so that the UNION isn't attached to the subquery by the 2308 # following call to _parse_set_operations, but instead becomes the parent node 2309 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2310 elif self._match(TokenType.VALUES): 2311 this = self.expression( 2312 exp.Values, 2313 expressions=self._parse_csv(self._parse_value), 2314 alias=self._parse_table_alias(), 2315 ) 2316 elif from_: 2317 this = exp.select("*").from_(from_.this, copy=False) 2318 else: 2319 this = None 2320 2321 if parse_set_operation: 2322 return self._parse_set_operations(this) 2323 return this 2324 2325 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2326 if not skip_with_token and not self._match(TokenType.WITH): 2327 return None 2328 2329 comments = self._prev_comments 2330 recursive = self._match(TokenType.RECURSIVE) 2331 2332 expressions = [] 2333 while True: 2334 expressions.append(self._parse_cte()) 2335 2336 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2337 break 2338 else: 2339 self._match(TokenType.WITH) 2340 2341 return self.expression( 2342 exp.With, comments=comments, expressions=expressions, recursive=recursive 2343 ) 2344 2345 def _parse_cte(self) -> exp.CTE: 2346 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2347 if not alias or not alias.this: 2348 self.raise_error("Expected CTE to have alias") 2349 2350 self._match(TokenType.ALIAS) 2351 return self.expression( 2352 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2353 ) 2354 2355 def _parse_table_alias( 2356 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2357 ) -> t.Optional[exp.TableAlias]: 2358 any_token = self._match(TokenType.ALIAS) 2359 alias = ( 2360 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2361 or self._parse_string_as_identifier() 2362 ) 2363 2364 index = self._index 2365 if self._match(TokenType.L_PAREN): 2366 columns = self._parse_csv(self._parse_function_parameter) 2367 self._match_r_paren() if columns else self._retreat(index) 2368 else: 2369 columns = None 2370 2371 if not alias and not columns: 2372 return None 2373 2374 return self.expression(exp.TableAlias, this=alias, columns=columns) 2375 2376 def _parse_subquery( 2377 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2378 ) -> t.Optional[exp.Subquery]: 2379 if not this: 2380 return None 2381 2382 return self.expression( 2383 exp.Subquery, 2384 this=this, 2385 pivots=self._parse_pivots(), 2386 alias=self._parse_table_alias() if parse_alias else None, 2387 ) 2388 2389 def _parse_query_modifiers( 2390 self, this: t.Optional[exp.Expression] 2391 ) -> t.Optional[exp.Expression]: 2392 if isinstance(this, self.MODIFIABLES): 2393 for join in iter(self._parse_join, None): 2394 this.append("joins", join) 2395 for lateral in iter(self._parse_lateral, None): 2396 this.append("laterals", lateral) 2397 2398 while True: 2399 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2400 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2401 key, expression = parser(self) 2402 2403 if expression: 2404 this.set(key, expression) 2405 if key == "limit": 2406 offset = expression.args.pop("offset", None) 2407 if offset: 2408 this.set("offset", exp.Offset(expression=offset)) 2409 continue 2410 break 2411 return this 2412 2413 def _parse_hint(self) -> t.Optional[exp.Hint]: 2414 if self._match(TokenType.HINT): 2415 hints = [] 2416 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2417 hints.extend(hint) 2418 2419 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2420 self.raise_error("Expected */ after HINT") 2421 2422 return self.expression(exp.Hint, expressions=hints) 2423 2424 return None 2425 2426 def _parse_into(self) -> t.Optional[exp.Into]: 2427 if not self._match(TokenType.INTO): 2428 return None 2429 2430 temp = self._match(TokenType.TEMPORARY) 2431 unlogged = self._match_text_seq("UNLOGGED") 2432 self._match(TokenType.TABLE) 2433 2434 return self.expression( 2435 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2436 ) 2437 2438 def _parse_from( 2439 self, joins: bool = False, skip_from_token: bool = False 2440 ) -> t.Optional[exp.From]: 2441 if not skip_from_token and not self._match(TokenType.FROM): 2442 return None 2443 2444 return self.expression( 2445 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2446 ) 2447 2448 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2449 if not self._match(TokenType.MATCH_RECOGNIZE): 2450 return None 2451 2452 self._match_l_paren() 2453 2454 partition = self._parse_partition_by() 2455 order = self._parse_order() 2456 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2457 2458 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2459 rows = exp.var("ONE ROW PER MATCH") 2460 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2461 text = "ALL ROWS PER MATCH" 2462 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2463 text += f" SHOW EMPTY MATCHES" 2464 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2465 text += f" OMIT EMPTY MATCHES" 2466 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2467 text += f" WITH UNMATCHED ROWS" 2468 rows = exp.var(text) 2469 else: 2470 rows = None 2471 2472 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2473 text = "AFTER MATCH SKIP" 2474 if self._match_text_seq("PAST", "LAST", "ROW"): 2475 text += f" PAST LAST ROW" 2476 elif self._match_text_seq("TO", "NEXT", "ROW"): 2477 text += f" TO NEXT ROW" 2478 elif self._match_text_seq("TO", "FIRST"): 2479 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2480 elif self._match_text_seq("TO", "LAST"): 2481 text += f" TO LAST {self._advance_any().text}" # type: ignore 2482 after = exp.var(text) 2483 else: 2484 after = None 2485 2486 if self._match_text_seq("PATTERN"): 2487 self._match_l_paren() 2488 2489 if not self._curr: 2490 self.raise_error("Expecting )", self._curr) 2491 2492 paren = 1 2493 start = self._curr 2494 2495 while self._curr and paren > 0: 2496 if self._curr.token_type == TokenType.L_PAREN: 2497 paren += 1 2498 if self._curr.token_type == TokenType.R_PAREN: 2499 paren -= 1 2500 2501 end = self._prev 2502 self._advance() 2503 2504 if paren > 0: 2505 self.raise_error("Expecting )", self._curr) 2506 2507 pattern = exp.var(self._find_sql(start, end)) 2508 else: 2509 pattern = None 2510 2511 define = ( 2512 self._parse_csv(self._parse_name_as_expression) 2513 if self._match_text_seq("DEFINE") 2514 else None 2515 ) 2516 2517 self._match_r_paren() 2518 2519 return self.expression( 2520 exp.MatchRecognize, 2521 partition_by=partition, 2522 order=order, 2523 measures=measures, 2524 rows=rows, 2525 after=after, 2526 pattern=pattern, 2527 define=define, 2528 alias=self._parse_table_alias(), 2529 ) 2530 2531 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2532 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2533 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2534 cross_apply = False 2535 2536 if cross_apply is not None: 2537 this = self._parse_select(table=True) 2538 view = None 2539 outer = None 2540 elif self._match(TokenType.LATERAL): 2541 this = self._parse_select(table=True) 2542 view = self._match(TokenType.VIEW) 2543 outer = self._match(TokenType.OUTER) 2544 else: 2545 return None 2546 2547 if not this: 2548 this = ( 2549 self._parse_unnest() 2550 or self._parse_function() 2551 or self._parse_id_var(any_token=False) 2552 ) 2553 2554 while self._match(TokenType.DOT): 2555 this = exp.Dot( 2556 this=this, 2557 expression=self._parse_function() or self._parse_id_var(any_token=False), 2558 ) 2559 2560 if view: 2561 table = self._parse_id_var(any_token=False) 2562 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2563 table_alias: t.Optional[exp.TableAlias] = self.expression( 2564 exp.TableAlias, this=table, columns=columns 2565 ) 2566 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2567 # We move the alias from the lateral's child node to the lateral itself 2568 table_alias = this.args["alias"].pop() 2569 else: 2570 table_alias = self._parse_table_alias() 2571 2572 return self.expression( 2573 exp.Lateral, 2574 this=this, 2575 view=view, 2576 outer=outer, 2577 alias=table_alias, 2578 cross_apply=cross_apply, 2579 ) 2580 2581 def _parse_join_parts( 2582 self, 2583 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2584 return ( 2585 self._match_set(self.JOIN_METHODS) and self._prev, 2586 self._match_set(self.JOIN_SIDES) and self._prev, 2587 self._match_set(self.JOIN_KINDS) and self._prev, 2588 ) 2589 2590 def _parse_join( 2591 self, skip_join_token: bool = False, parse_bracket: bool = False 2592 ) -> t.Optional[exp.Join]: 2593 if self._match(TokenType.COMMA): 2594 return self.expression(exp.Join, this=self._parse_table()) 2595 2596 index = self._index 2597 method, side, kind = self._parse_join_parts() 2598 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2599 join = self._match(TokenType.JOIN) 2600 2601 if not skip_join_token and not join: 2602 self._retreat(index) 2603 kind = None 2604 method = None 2605 side = None 2606 2607 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2608 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2609 2610 if not skip_join_token and not join and not outer_apply and not cross_apply: 2611 return None 2612 2613 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2614 2615 if method: 2616 kwargs["method"] = method.text 2617 if side: 2618 kwargs["side"] = side.text 2619 if kind: 2620 kwargs["kind"] = kind.text 2621 if hint: 2622 kwargs["hint"] = hint 2623 2624 if self._match(TokenType.ON): 2625 kwargs["on"] = self._parse_conjunction() 2626 elif self._match(TokenType.USING): 2627 kwargs["using"] = self._parse_wrapped_id_vars() 2628 elif not (kind and kind.token_type == TokenType.CROSS): 2629 index = self._index 2630 join = self._parse_join() 2631 2632 if join and self._match(TokenType.ON): 2633 kwargs["on"] = self._parse_conjunction() 2634 elif join and self._match(TokenType.USING): 2635 kwargs["using"] = self._parse_wrapped_id_vars() 2636 else: 2637 join = None 2638 self._retreat(index) 2639 2640 kwargs["this"].set("joins", [join] if join else None) 2641 2642 comments = [c for token in (method, side, kind) if token for c in token.comments] 2643 return self.expression(exp.Join, comments=comments, **kwargs) 2644 2645 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2646 this = self._parse_conjunction() 2647 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2648 return this 2649 2650 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2651 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2652 2653 return this 2654 2655 def _parse_index( 2656 self, 2657 index: t.Optional[exp.Expression] = None, 2658 ) -> t.Optional[exp.Index]: 2659 if index: 2660 unique = None 2661 primary = None 2662 amp = None 2663 2664 self._match(TokenType.ON) 2665 self._match(TokenType.TABLE) # hive 2666 table = self._parse_table_parts(schema=True) 2667 else: 2668 unique = self._match(TokenType.UNIQUE) 2669 primary = self._match_text_seq("PRIMARY") 2670 amp = self._match_text_seq("AMP") 2671 2672 if not self._match(TokenType.INDEX): 2673 return None 2674 2675 index = self._parse_id_var() 2676 table = None 2677 2678 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2679 2680 if self._match(TokenType.L_PAREN, advance=False): 2681 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2682 else: 2683 columns = None 2684 2685 return self.expression( 2686 exp.Index, 2687 this=index, 2688 table=table, 2689 using=using, 2690 columns=columns, 2691 unique=unique, 2692 primary=primary, 2693 amp=amp, 2694 partition_by=self._parse_partition_by(), 2695 where=self._parse_where(), 2696 ) 2697 2698 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2699 hints: t.List[exp.Expression] = [] 2700 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2701 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2702 hints.append( 2703 self.expression( 2704 exp.WithTableHint, 2705 expressions=self._parse_csv( 2706 lambda: self._parse_function() or self._parse_var(any_token=True) 2707 ), 2708 ) 2709 ) 2710 self._match_r_paren() 2711 else: 2712 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2713 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2714 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2715 2716 self._match_texts(("INDEX", "KEY")) 2717 if self._match(TokenType.FOR): 2718 hint.set("target", self._advance_any() and self._prev.text.upper()) 2719 2720 hint.set("expressions", self._parse_wrapped_id_vars()) 2721 hints.append(hint) 2722 2723 return hints or None 2724 2725 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2726 return ( 2727 (not schema and self._parse_function(optional_parens=False)) 2728 or self._parse_id_var(any_token=False) 2729 or self._parse_string_as_identifier() 2730 or self._parse_placeholder() 2731 ) 2732 2733 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2734 catalog = None 2735 db = None 2736 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2737 2738 while self._match(TokenType.DOT): 2739 if catalog: 2740 # This allows nesting the table in arbitrarily many dot expressions if needed 2741 table = self.expression( 2742 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2743 ) 2744 else: 2745 catalog = db 2746 db = table 2747 table = self._parse_table_part(schema=schema) or "" 2748 2749 if not table: 2750 self.raise_error(f"Expected table name but got {self._curr}") 2751 2752 return self.expression( 2753 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2754 ) 2755 2756 def _parse_table( 2757 self, 2758 schema: bool = False, 2759 joins: bool = False, 2760 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2761 parse_bracket: bool = False, 2762 ) -> t.Optional[exp.Expression]: 2763 lateral = self._parse_lateral() 2764 if lateral: 2765 return lateral 2766 2767 unnest = self._parse_unnest() 2768 if unnest: 2769 return unnest 2770 2771 values = self._parse_derived_table_values() 2772 if values: 2773 return values 2774 2775 subquery = self._parse_select(table=True) 2776 if subquery: 2777 if not subquery.args.get("pivots"): 2778 subquery.set("pivots", self._parse_pivots()) 2779 return subquery 2780 2781 bracket = parse_bracket and self._parse_bracket(None) 2782 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2783 this = t.cast( 2784 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2785 ) 2786 2787 if schema: 2788 return self._parse_schema(this=this) 2789 2790 version = self._parse_version() 2791 2792 if version: 2793 this.set("version", version) 2794 2795 if self.dialect.ALIAS_POST_TABLESAMPLE: 2796 table_sample = self._parse_table_sample() 2797 2798 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2799 if alias: 2800 this.set("alias", alias) 2801 2802 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2803 return self.expression( 2804 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2805 ) 2806 2807 this.set("hints", self._parse_table_hints()) 2808 2809 if not this.args.get("pivots"): 2810 this.set("pivots", self._parse_pivots()) 2811 2812 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2813 table_sample = self._parse_table_sample() 2814 2815 if table_sample: 2816 table_sample.set("this", this) 2817 this = table_sample 2818 2819 if joins: 2820 for join in iter(self._parse_join, None): 2821 this.append("joins", join) 2822 2823 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2824 this.set("ordinality", True) 2825 this.set("alias", self._parse_table_alias()) 2826 2827 return this 2828 2829 def _parse_version(self) -> t.Optional[exp.Version]: 2830 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2831 this = "TIMESTAMP" 2832 elif self._match(TokenType.VERSION_SNAPSHOT): 2833 this = "VERSION" 2834 else: 2835 return None 2836 2837 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2838 kind = self._prev.text.upper() 2839 start = self._parse_bitwise() 2840 self._match_texts(("TO", "AND")) 2841 end = self._parse_bitwise() 2842 expression: t.Optional[exp.Expression] = self.expression( 2843 exp.Tuple, expressions=[start, end] 2844 ) 2845 elif self._match_text_seq("CONTAINED", "IN"): 2846 kind = "CONTAINED IN" 2847 expression = self.expression( 2848 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2849 ) 2850 elif self._match(TokenType.ALL): 2851 kind = "ALL" 2852 expression = None 2853 else: 2854 self._match_text_seq("AS", "OF") 2855 kind = "AS OF" 2856 expression = self._parse_type() 2857 2858 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2859 2860 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2861 if not self._match(TokenType.UNNEST): 2862 return None 2863 2864 expressions = self._parse_wrapped_csv(self._parse_equality) 2865 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2866 2867 alias = self._parse_table_alias() if with_alias else None 2868 2869 if alias: 2870 if self.dialect.UNNEST_COLUMN_ONLY: 2871 if alias.args.get("columns"): 2872 self.raise_error("Unexpected extra column alias in unnest.") 2873 2874 alias.set("columns", [alias.this]) 2875 alias.set("this", None) 2876 2877 columns = alias.args.get("columns") or [] 2878 if offset and len(expressions) < len(columns): 2879 offset = columns.pop() 2880 2881 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2882 self._match(TokenType.ALIAS) 2883 offset = self._parse_id_var( 2884 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2885 ) or exp.to_identifier("offset") 2886 2887 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2888 2889 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2890 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2891 if not is_derived and not self._match(TokenType.VALUES): 2892 return None 2893 2894 expressions = self._parse_csv(self._parse_value) 2895 alias = self._parse_table_alias() 2896 2897 if is_derived: 2898 self._match_r_paren() 2899 2900 return self.expression( 2901 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2902 ) 2903 2904 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2905 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2906 as_modifier and self._match_text_seq("USING", "SAMPLE") 2907 ): 2908 return None 2909 2910 bucket_numerator = None 2911 bucket_denominator = None 2912 bucket_field = None 2913 percent = None 2914 size = None 2915 seed = None 2916 2917 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 2918 matched_l_paren = self._match(TokenType.L_PAREN) 2919 2920 if self.TABLESAMPLE_CSV: 2921 num = None 2922 expressions = self._parse_csv(self._parse_primary) 2923 else: 2924 expressions = None 2925 num = ( 2926 self._parse_factor() 2927 if self._match(TokenType.NUMBER, advance=False) 2928 else self._parse_primary() or self._parse_placeholder() 2929 ) 2930 2931 if self._match_text_seq("BUCKET"): 2932 bucket_numerator = self._parse_number() 2933 self._match_text_seq("OUT", "OF") 2934 bucket_denominator = bucket_denominator = self._parse_number() 2935 self._match(TokenType.ON) 2936 bucket_field = self._parse_field() 2937 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2938 percent = num 2939 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 2940 size = num 2941 else: 2942 percent = num 2943 2944 if matched_l_paren: 2945 self._match_r_paren() 2946 2947 if self._match(TokenType.L_PAREN): 2948 method = self._parse_var(upper=True) 2949 seed = self._match(TokenType.COMMA) and self._parse_number() 2950 self._match_r_paren() 2951 elif self._match_texts(("SEED", "REPEATABLE")): 2952 seed = self._parse_wrapped(self._parse_number) 2953 2954 return self.expression( 2955 exp.TableSample, 2956 expressions=expressions, 2957 method=method, 2958 bucket_numerator=bucket_numerator, 2959 bucket_denominator=bucket_denominator, 2960 bucket_field=bucket_field, 2961 percent=percent, 2962 size=size, 2963 seed=seed, 2964 ) 2965 2966 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2967 return list(iter(self._parse_pivot, None)) or None 2968 2969 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2970 return list(iter(self._parse_join, None)) or None 2971 2972 # https://duckdb.org/docs/sql/statements/pivot 2973 def _parse_simplified_pivot(self) -> exp.Pivot: 2974 def _parse_on() -> t.Optional[exp.Expression]: 2975 this = self._parse_bitwise() 2976 return self._parse_in(this) if self._match(TokenType.IN) else this 2977 2978 this = self._parse_table() 2979 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2980 using = self._match(TokenType.USING) and self._parse_csv( 2981 lambda: self._parse_alias(self._parse_function()) 2982 ) 2983 group = self._parse_group() 2984 return self.expression( 2985 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2986 ) 2987 2988 def _parse_pivot_in(self) -> exp.In: 2989 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 2990 this = self._parse_conjunction() 2991 2992 self._match(TokenType.ALIAS) 2993 alias = self._parse_field() 2994 if alias: 2995 return self.expression(exp.PivotAlias, this=this, alias=alias) 2996 2997 return this 2998 2999 value = self._parse_column() 3000 3001 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3002 self.raise_error("Expecting IN (") 3003 3004 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3005 3006 self._match_r_paren() 3007 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3008 3009 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3010 index = self._index 3011 include_nulls = None 3012 3013 if self._match(TokenType.PIVOT): 3014 unpivot = False 3015 elif self._match(TokenType.UNPIVOT): 3016 unpivot = True 3017 3018 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3019 if self._match_text_seq("INCLUDE", "NULLS"): 3020 include_nulls = True 3021 elif self._match_text_seq("EXCLUDE", "NULLS"): 3022 include_nulls = False 3023 else: 3024 return None 3025 3026 expressions = [] 3027 3028 if not self._match(TokenType.L_PAREN): 3029 self._retreat(index) 3030 return None 3031 3032 if unpivot: 3033 expressions = self._parse_csv(self._parse_column) 3034 else: 3035 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3036 3037 if not expressions: 3038 self.raise_error("Failed to parse PIVOT's aggregation list") 3039 3040 if not self._match(TokenType.FOR): 3041 self.raise_error("Expecting FOR") 3042 3043 field = self._parse_pivot_in() 3044 3045 self._match_r_paren() 3046 3047 pivot = self.expression( 3048 exp.Pivot, 3049 expressions=expressions, 3050 field=field, 3051 unpivot=unpivot, 3052 include_nulls=include_nulls, 3053 ) 3054 3055 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3056 pivot.set("alias", self._parse_table_alias()) 3057 3058 if not unpivot: 3059 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3060 3061 columns: t.List[exp.Expression] = [] 3062 for fld in pivot.args["field"].expressions: 3063 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3064 for name in names: 3065 if self.PREFIXED_PIVOT_COLUMNS: 3066 name = f"{name}_{field_name}" if name else field_name 3067 else: 3068 name = f"{field_name}_{name}" if name else field_name 3069 3070 columns.append(exp.to_identifier(name)) 3071 3072 pivot.set("columns", columns) 3073 3074 return pivot 3075 3076 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3077 return [agg.alias for agg in aggregations] 3078 3079 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3080 if not skip_where_token and not self._match(TokenType.WHERE): 3081 return None 3082 3083 return self.expression( 3084 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3085 ) 3086 3087 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3088 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3089 return None 3090 3091 elements = defaultdict(list) 3092 3093 if self._match(TokenType.ALL): 3094 return self.expression(exp.Group, all=True) 3095 3096 while True: 3097 expressions = self._parse_csv(self._parse_conjunction) 3098 if expressions: 3099 elements["expressions"].extend(expressions) 3100 3101 grouping_sets = self._parse_grouping_sets() 3102 if grouping_sets: 3103 elements["grouping_sets"].extend(grouping_sets) 3104 3105 rollup = None 3106 cube = None 3107 totals = None 3108 3109 index = self._index 3110 with_ = self._match(TokenType.WITH) 3111 if self._match(TokenType.ROLLUP): 3112 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3113 elements["rollup"].extend(ensure_list(rollup)) 3114 3115 if self._match(TokenType.CUBE): 3116 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3117 elements["cube"].extend(ensure_list(cube)) 3118 3119 if self._match_text_seq("TOTALS"): 3120 totals = True 3121 elements["totals"] = True # type: ignore 3122 3123 if not (grouping_sets or rollup or cube or totals): 3124 if with_: 3125 self._retreat(index) 3126 break 3127 3128 return self.expression(exp.Group, **elements) # type: ignore 3129 3130 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3131 if not self._match(TokenType.GROUPING_SETS): 3132 return None 3133 3134 return self._parse_wrapped_csv(self._parse_grouping_set) 3135 3136 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3137 if self._match(TokenType.L_PAREN): 3138 grouping_set = self._parse_csv(self._parse_column) 3139 self._match_r_paren() 3140 return self.expression(exp.Tuple, expressions=grouping_set) 3141 3142 return self._parse_column() 3143 3144 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3145 if not skip_having_token and not self._match(TokenType.HAVING): 3146 return None 3147 return self.expression(exp.Having, this=self._parse_conjunction()) 3148 3149 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3150 if not self._match(TokenType.QUALIFY): 3151 return None 3152 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3153 3154 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3155 if skip_start_token: 3156 start = None 3157 elif self._match(TokenType.START_WITH): 3158 start = self._parse_conjunction() 3159 else: 3160 return None 3161 3162 self._match(TokenType.CONNECT_BY) 3163 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3164 exp.Prior, this=self._parse_bitwise() 3165 ) 3166 connect = self._parse_conjunction() 3167 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3168 3169 if not start and self._match(TokenType.START_WITH): 3170 start = self._parse_conjunction() 3171 3172 return self.expression(exp.Connect, start=start, connect=connect) 3173 3174 def _parse_name_as_expression(self) -> exp.Alias: 3175 return self.expression( 3176 exp.Alias, 3177 alias=self._parse_id_var(any_token=True), 3178 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3179 ) 3180 3181 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3182 if self._match_text_seq("INTERPOLATE"): 3183 return self._parse_wrapped_csv(self._parse_name_as_expression) 3184 return None 3185 3186 def _parse_order( 3187 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3188 ) -> t.Optional[exp.Expression]: 3189 siblings = None 3190 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3191 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3192 return this 3193 3194 siblings = True 3195 3196 return self.expression( 3197 exp.Order, 3198 this=this, 3199 expressions=self._parse_csv(self._parse_ordered), 3200 interpolate=self._parse_interpolate(), 3201 siblings=siblings, 3202 ) 3203 3204 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3205 if not self._match(token): 3206 return None 3207 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3208 3209 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3210 this = parse_method() if parse_method else self._parse_conjunction() 3211 3212 asc = self._match(TokenType.ASC) 3213 desc = self._match(TokenType.DESC) or (asc and False) 3214 3215 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3216 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3217 3218 nulls_first = is_nulls_first or False 3219 explicitly_null_ordered = is_nulls_first or is_nulls_last 3220 3221 if ( 3222 not explicitly_null_ordered 3223 and ( 3224 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3225 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3226 ) 3227 and self.dialect.NULL_ORDERING != "nulls_are_last" 3228 ): 3229 nulls_first = True 3230 3231 if self._match_text_seq("WITH", "FILL"): 3232 with_fill = self.expression( 3233 exp.WithFill, 3234 **{ # type: ignore 3235 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3236 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3237 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3238 }, 3239 ) 3240 else: 3241 with_fill = None 3242 3243 return self.expression( 3244 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3245 ) 3246 3247 def _parse_limit( 3248 self, this: t.Optional[exp.Expression] = None, top: bool = False 3249 ) -> t.Optional[exp.Expression]: 3250 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3251 comments = self._prev_comments 3252 if top: 3253 limit_paren = self._match(TokenType.L_PAREN) 3254 expression = self._parse_term() if limit_paren else self._parse_number() 3255 3256 if limit_paren: 3257 self._match_r_paren() 3258 else: 3259 expression = self._parse_term() 3260 3261 if self._match(TokenType.COMMA): 3262 offset = expression 3263 expression = self._parse_term() 3264 else: 3265 offset = None 3266 3267 limit_exp = self.expression( 3268 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3269 ) 3270 3271 return limit_exp 3272 3273 if self._match(TokenType.FETCH): 3274 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3275 direction = self._prev.text.upper() if direction else "FIRST" 3276 3277 count = self._parse_field(tokens=self.FETCH_TOKENS) 3278 percent = self._match(TokenType.PERCENT) 3279 3280 self._match_set((TokenType.ROW, TokenType.ROWS)) 3281 3282 only = self._match_text_seq("ONLY") 3283 with_ties = self._match_text_seq("WITH", "TIES") 3284 3285 if only and with_ties: 3286 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3287 3288 return self.expression( 3289 exp.Fetch, 3290 direction=direction, 3291 count=count, 3292 percent=percent, 3293 with_ties=with_ties, 3294 ) 3295 3296 return this 3297 3298 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3299 if not self._match(TokenType.OFFSET): 3300 return this 3301 3302 count = self._parse_term() 3303 self._match_set((TokenType.ROW, TokenType.ROWS)) 3304 return self.expression(exp.Offset, this=this, expression=count) 3305 3306 def _parse_locks(self) -> t.List[exp.Lock]: 3307 locks = [] 3308 while True: 3309 if self._match_text_seq("FOR", "UPDATE"): 3310 update = True 3311 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3312 "LOCK", "IN", "SHARE", "MODE" 3313 ): 3314 update = False 3315 else: 3316 break 3317 3318 expressions = None 3319 if self._match_text_seq("OF"): 3320 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3321 3322 wait: t.Optional[bool | exp.Expression] = None 3323 if self._match_text_seq("NOWAIT"): 3324 wait = True 3325 elif self._match_text_seq("WAIT"): 3326 wait = self._parse_primary() 3327 elif self._match_text_seq("SKIP", "LOCKED"): 3328 wait = False 3329 3330 locks.append( 3331 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3332 ) 3333 3334 return locks 3335 3336 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3337 while this and self._match_set(self.SET_OPERATIONS): 3338 token_type = self._prev.token_type 3339 3340 if token_type == TokenType.UNION: 3341 operation = exp.Union 3342 elif token_type == TokenType.EXCEPT: 3343 operation = exp.Except 3344 else: 3345 operation = exp.Intersect 3346 3347 comments = self._prev.comments 3348 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3349 by_name = self._match_text_seq("BY", "NAME") 3350 expression = self._parse_select(nested=True, parse_set_operation=False) 3351 3352 this = self.expression( 3353 operation, 3354 comments=comments, 3355 this=this, 3356 distinct=distinct, 3357 by_name=by_name, 3358 expression=expression, 3359 ) 3360 3361 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3362 expression = this.expression 3363 3364 if expression: 3365 for arg in self.UNION_MODIFIERS: 3366 expr = expression.args.get(arg) 3367 if expr: 3368 this.set(arg, expr.pop()) 3369 3370 return this 3371 3372 def _parse_expression(self) -> t.Optional[exp.Expression]: 3373 return self._parse_alias(self._parse_conjunction()) 3374 3375 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3376 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3377 3378 def _parse_equality(self) -> t.Optional[exp.Expression]: 3379 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3380 3381 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3382 return self._parse_tokens(self._parse_range, self.COMPARISON) 3383 3384 def _parse_range(self) -> t.Optional[exp.Expression]: 3385 this = self._parse_bitwise() 3386 negate = self._match(TokenType.NOT) 3387 3388 if self._match_set(self.RANGE_PARSERS): 3389 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3390 if not expression: 3391 return this 3392 3393 this = expression 3394 elif self._match(TokenType.ISNULL): 3395 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3396 3397 # Postgres supports ISNULL and NOTNULL for conditions. 3398 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3399 if self._match(TokenType.NOTNULL): 3400 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3401 this = self.expression(exp.Not, this=this) 3402 3403 if negate: 3404 this = self.expression(exp.Not, this=this) 3405 3406 if self._match(TokenType.IS): 3407 this = self._parse_is(this) 3408 3409 return this 3410 3411 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3412 index = self._index - 1 3413 negate = self._match(TokenType.NOT) 3414 3415 if self._match_text_seq("DISTINCT", "FROM"): 3416 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3417 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3418 3419 expression = self._parse_null() or self._parse_boolean() 3420 if not expression: 3421 self._retreat(index) 3422 return None 3423 3424 this = self.expression(exp.Is, this=this, expression=expression) 3425 return self.expression(exp.Not, this=this) if negate else this 3426 3427 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3428 unnest = self._parse_unnest(with_alias=False) 3429 if unnest: 3430 this = self.expression(exp.In, this=this, unnest=unnest) 3431 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3432 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3433 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3434 3435 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3436 this = self.expression(exp.In, this=this, query=expressions[0]) 3437 else: 3438 this = self.expression(exp.In, this=this, expressions=expressions) 3439 3440 if matched_l_paren: 3441 self._match_r_paren(this) 3442 elif not self._match(TokenType.R_BRACKET, expression=this): 3443 self.raise_error("Expecting ]") 3444 else: 3445 this = self.expression(exp.In, this=this, field=self._parse_field()) 3446 3447 return this 3448 3449 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3450 low = self._parse_bitwise() 3451 self._match(TokenType.AND) 3452 high = self._parse_bitwise() 3453 return self.expression(exp.Between, this=this, low=low, high=high) 3454 3455 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3456 if not self._match(TokenType.ESCAPE): 3457 return this 3458 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3459 3460 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3461 index = self._index 3462 3463 if not self._match(TokenType.INTERVAL) and match_interval: 3464 return None 3465 3466 if self._match(TokenType.STRING, advance=False): 3467 this = self._parse_primary() 3468 else: 3469 this = self._parse_term() 3470 3471 if not this or ( 3472 isinstance(this, exp.Column) 3473 and not this.table 3474 and not this.this.quoted 3475 and this.name.upper() == "IS" 3476 ): 3477 self._retreat(index) 3478 return None 3479 3480 unit = self._parse_function() or ( 3481 not self._match(TokenType.ALIAS, advance=False) 3482 and self._parse_var(any_token=True, upper=True) 3483 ) 3484 3485 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3486 # each INTERVAL expression into this canonical form so it's easy to transpile 3487 if this and this.is_number: 3488 this = exp.Literal.string(this.name) 3489 elif this and this.is_string: 3490 parts = this.name.split() 3491 3492 if len(parts) == 2: 3493 if unit: 3494 # This is not actually a unit, it's something else (e.g. a "window side") 3495 unit = None 3496 self._retreat(self._index - 1) 3497 3498 this = exp.Literal.string(parts[0]) 3499 unit = self.expression(exp.Var, this=parts[1].upper()) 3500 3501 return self.expression(exp.Interval, this=this, unit=unit) 3502 3503 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3504 this = self._parse_term() 3505 3506 while True: 3507 if self._match_set(self.BITWISE): 3508 this = self.expression( 3509 self.BITWISE[self._prev.token_type], 3510 this=this, 3511 expression=self._parse_term(), 3512 ) 3513 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3514 this = self.expression( 3515 exp.DPipe, 3516 this=this, 3517 expression=self._parse_term(), 3518 safe=not self.dialect.STRICT_STRING_CONCAT, 3519 ) 3520 elif self._match(TokenType.DQMARK): 3521 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3522 elif self._match_pair(TokenType.LT, TokenType.LT): 3523 this = self.expression( 3524 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3525 ) 3526 elif self._match_pair(TokenType.GT, TokenType.GT): 3527 this = self.expression( 3528 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3529 ) 3530 else: 3531 break 3532 3533 return this 3534 3535 def _parse_term(self) -> t.Optional[exp.Expression]: 3536 return self._parse_tokens(self._parse_factor, self.TERM) 3537 3538 def _parse_factor(self) -> t.Optional[exp.Expression]: 3539 if self.EXPONENT: 3540 factor = self._parse_tokens(self._parse_exponent, self.FACTOR) 3541 else: 3542 factor = self._parse_tokens(self._parse_unary, self.FACTOR) 3543 if isinstance(factor, exp.Div): 3544 factor.args["typed"] = self.dialect.TYPED_DIVISION 3545 factor.args["safe"] = self.dialect.SAFE_DIVISION 3546 return factor 3547 3548 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3549 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3550 3551 def _parse_unary(self) -> t.Optional[exp.Expression]: 3552 if self._match_set(self.UNARY_PARSERS): 3553 return self.UNARY_PARSERS[self._prev.token_type](self) 3554 return self._parse_at_time_zone(self._parse_type()) 3555 3556 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3557 interval = parse_interval and self._parse_interval() 3558 if interval: 3559 # Convert INTERVAL 'val_1' unit_1 ... 'val_n' unit_n into a sum of intervals 3560 while self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3561 interval = self.expression( # type: ignore 3562 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3563 ) 3564 3565 return interval 3566 3567 index = self._index 3568 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3569 this = self._parse_column() 3570 3571 if data_type: 3572 if isinstance(this, exp.Literal): 3573 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3574 if parser: 3575 return parser(self, this, data_type) 3576 return self.expression(exp.Cast, this=this, to=data_type) 3577 if not data_type.expressions: 3578 self._retreat(index) 3579 return self._parse_column() 3580 return self._parse_column_ops(data_type) 3581 3582 return this and self._parse_column_ops(this) 3583 3584 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3585 this = self._parse_type() 3586 if not this: 3587 return None 3588 3589 return self.expression( 3590 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3591 ) 3592 3593 def _parse_types( 3594 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3595 ) -> t.Optional[exp.Expression]: 3596 index = self._index 3597 3598 prefix = self._match_text_seq("SYSUDTLIB", ".") 3599 3600 if not self._match_set(self.TYPE_TOKENS): 3601 identifier = allow_identifiers and self._parse_id_var( 3602 any_token=False, tokens=(TokenType.VAR,) 3603 ) 3604 3605 if identifier: 3606 tokens = self.dialect.tokenize(identifier.name) 3607 3608 if len(tokens) != 1: 3609 self.raise_error("Unexpected identifier", self._prev) 3610 3611 if tokens[0].token_type in self.TYPE_TOKENS: 3612 self._prev = tokens[0] 3613 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3614 type_name = identifier.name 3615 3616 while self._match(TokenType.DOT): 3617 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3618 3619 return exp.DataType.build(type_name, udt=True) 3620 else: 3621 return None 3622 else: 3623 return None 3624 3625 type_token = self._prev.token_type 3626 3627 if type_token == TokenType.PSEUDO_TYPE: 3628 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3629 3630 if type_token == TokenType.OBJECT_IDENTIFIER: 3631 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3632 3633 nested = type_token in self.NESTED_TYPE_TOKENS 3634 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3635 expressions = None 3636 maybe_func = False 3637 3638 if self._match(TokenType.L_PAREN): 3639 if is_struct: 3640 expressions = self._parse_csv(self._parse_struct_types) 3641 elif nested: 3642 expressions = self._parse_csv( 3643 lambda: self._parse_types( 3644 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3645 ) 3646 ) 3647 elif type_token in self.ENUM_TYPE_TOKENS: 3648 expressions = self._parse_csv(self._parse_equality) 3649 else: 3650 expressions = self._parse_csv(self._parse_type_size) 3651 3652 if not expressions or not self._match(TokenType.R_PAREN): 3653 self._retreat(index) 3654 return None 3655 3656 maybe_func = True 3657 3658 this: t.Optional[exp.Expression] = None 3659 values: t.Optional[t.List[exp.Expression]] = None 3660 3661 if nested and self._match(TokenType.LT): 3662 if is_struct: 3663 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 3664 else: 3665 expressions = self._parse_csv( 3666 lambda: self._parse_types( 3667 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3668 ) 3669 ) 3670 3671 if not self._match(TokenType.GT): 3672 self.raise_error("Expecting >") 3673 3674 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3675 values = self._parse_csv(self._parse_conjunction) 3676 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3677 3678 if type_token in self.TIMESTAMPS: 3679 if self._match_text_seq("WITH", "TIME", "ZONE"): 3680 maybe_func = False 3681 tz_type = ( 3682 exp.DataType.Type.TIMETZ 3683 if type_token in self.TIMES 3684 else exp.DataType.Type.TIMESTAMPTZ 3685 ) 3686 this = exp.DataType(this=tz_type, expressions=expressions) 3687 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3688 maybe_func = False 3689 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3690 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3691 maybe_func = False 3692 elif type_token == TokenType.INTERVAL: 3693 unit = self._parse_var() 3694 3695 if self._match_text_seq("TO"): 3696 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3697 else: 3698 span = None 3699 3700 if span or not unit: 3701 this = self.expression( 3702 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3703 ) 3704 else: 3705 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3706 3707 if maybe_func and check_func: 3708 index2 = self._index 3709 peek = self._parse_string() 3710 3711 if not peek: 3712 self._retreat(index) 3713 return None 3714 3715 self._retreat(index2) 3716 3717 if not this: 3718 if self._match_text_seq("UNSIGNED"): 3719 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3720 if not unsigned_type_token: 3721 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3722 3723 type_token = unsigned_type_token or type_token 3724 3725 this = exp.DataType( 3726 this=exp.DataType.Type[type_token.value], 3727 expressions=expressions, 3728 nested=nested, 3729 values=values, 3730 prefix=prefix, 3731 ) 3732 3733 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3734 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3735 3736 return this 3737 3738 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 3739 index = self._index 3740 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3741 self._match(TokenType.COLON) 3742 column_def = self._parse_column_def(this) 3743 3744 if type_required and ( 3745 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 3746 ): 3747 self._retreat(index) 3748 return self._parse_types() 3749 3750 return column_def 3751 3752 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3753 if not self._match_text_seq("AT", "TIME", "ZONE"): 3754 return this 3755 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3756 3757 def _parse_column(self) -> t.Optional[exp.Expression]: 3758 this = self._parse_field() 3759 if isinstance(this, exp.Identifier): 3760 this = self.expression(exp.Column, this=this) 3761 elif not this: 3762 return self._parse_bracket(this) 3763 return self._parse_column_ops(this) 3764 3765 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3766 this = self._parse_bracket(this) 3767 3768 while self._match_set(self.COLUMN_OPERATORS): 3769 op_token = self._prev.token_type 3770 op = self.COLUMN_OPERATORS.get(op_token) 3771 3772 if op_token == TokenType.DCOLON: 3773 field = self._parse_types() 3774 if not field: 3775 self.raise_error("Expected type") 3776 elif op and self._curr: 3777 self._advance() 3778 value = self._prev.text 3779 field = ( 3780 exp.Literal.number(value) 3781 if self._prev.token_type == TokenType.NUMBER 3782 else exp.Literal.string(value) 3783 ) 3784 else: 3785 field = self._parse_field(anonymous_func=True, any_token=True) 3786 3787 if isinstance(field, exp.Func): 3788 # bigquery allows function calls like x.y.count(...) 3789 # SAFE.SUBSTR(...) 3790 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3791 this = self._replace_columns_with_dots(this) 3792 3793 if op: 3794 this = op(self, this, field) 3795 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3796 this = self.expression( 3797 exp.Column, 3798 this=field, 3799 table=this.this, 3800 db=this.args.get("table"), 3801 catalog=this.args.get("db"), 3802 ) 3803 else: 3804 this = self.expression(exp.Dot, this=this, expression=field) 3805 this = self._parse_bracket(this) 3806 return this 3807 3808 def _parse_primary(self) -> t.Optional[exp.Expression]: 3809 if self._match_set(self.PRIMARY_PARSERS): 3810 token_type = self._prev.token_type 3811 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3812 3813 if token_type == TokenType.STRING: 3814 expressions = [primary] 3815 while self._match(TokenType.STRING): 3816 expressions.append(exp.Literal.string(self._prev.text)) 3817 3818 if len(expressions) > 1: 3819 return self.expression(exp.Concat, expressions=expressions) 3820 3821 return primary 3822 3823 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3824 return exp.Literal.number(f"0.{self._prev.text}") 3825 3826 if self._match(TokenType.L_PAREN): 3827 comments = self._prev_comments 3828 query = self._parse_select() 3829 3830 if query: 3831 expressions = [query] 3832 else: 3833 expressions = self._parse_expressions() 3834 3835 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3836 3837 if isinstance(this, exp.Subqueryable): 3838 this = self._parse_set_operations( 3839 self._parse_subquery(this=this, parse_alias=False) 3840 ) 3841 elif len(expressions) > 1: 3842 this = self.expression(exp.Tuple, expressions=expressions) 3843 else: 3844 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3845 3846 if this: 3847 this.add_comments(comments) 3848 3849 self._match_r_paren(expression=this) 3850 return this 3851 3852 return None 3853 3854 def _parse_field( 3855 self, 3856 any_token: bool = False, 3857 tokens: t.Optional[t.Collection[TokenType]] = None, 3858 anonymous_func: bool = False, 3859 ) -> t.Optional[exp.Expression]: 3860 return ( 3861 self._parse_primary() 3862 or self._parse_function(anonymous=anonymous_func) 3863 or self._parse_id_var(any_token=any_token, tokens=tokens) 3864 ) 3865 3866 def _parse_function( 3867 self, 3868 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3869 anonymous: bool = False, 3870 optional_parens: bool = True, 3871 ) -> t.Optional[exp.Expression]: 3872 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3873 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3874 fn_syntax = False 3875 if ( 3876 self._match(TokenType.L_BRACE, advance=False) 3877 and self._next 3878 and self._next.text.upper() == "FN" 3879 ): 3880 self._advance(2) 3881 fn_syntax = True 3882 3883 func = self._parse_function_call( 3884 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3885 ) 3886 3887 if fn_syntax: 3888 self._match(TokenType.R_BRACE) 3889 3890 return func 3891 3892 def _parse_function_call( 3893 self, 3894 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3895 anonymous: bool = False, 3896 optional_parens: bool = True, 3897 ) -> t.Optional[exp.Expression]: 3898 if not self._curr: 3899 return None 3900 3901 comments = self._curr.comments 3902 token_type = self._curr.token_type 3903 this = self._curr.text 3904 upper = this.upper() 3905 3906 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3907 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3908 self._advance() 3909 return parser(self) 3910 3911 if not self._next or self._next.token_type != TokenType.L_PAREN: 3912 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3913 self._advance() 3914 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3915 3916 return None 3917 3918 if token_type not in self.FUNC_TOKENS: 3919 return None 3920 3921 self._advance(2) 3922 3923 parser = self.FUNCTION_PARSERS.get(upper) 3924 if parser and not anonymous: 3925 this = parser(self) 3926 else: 3927 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3928 3929 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3930 this = self.expression(subquery_predicate, this=self._parse_select()) 3931 self._match_r_paren() 3932 return this 3933 3934 if functions is None: 3935 functions = self.FUNCTIONS 3936 3937 function = functions.get(upper) 3938 3939 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3940 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3941 3942 if function and not anonymous: 3943 if "dialect" in function.__code__.co_varnames: 3944 func = function(args, dialect=self.dialect) 3945 else: 3946 func = function(args) 3947 3948 func = self.validate_expression(func, args) 3949 if not self.dialect.NORMALIZE_FUNCTIONS: 3950 func.meta["name"] = this 3951 3952 this = func 3953 else: 3954 this = self.expression(exp.Anonymous, this=this, expressions=args) 3955 3956 if isinstance(this, exp.Expression): 3957 this.add_comments(comments) 3958 3959 self._match_r_paren(this) 3960 return self._parse_window(this) 3961 3962 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3963 return self._parse_column_def(self._parse_id_var()) 3964 3965 def _parse_user_defined_function( 3966 self, kind: t.Optional[TokenType] = None 3967 ) -> t.Optional[exp.Expression]: 3968 this = self._parse_id_var() 3969 3970 while self._match(TokenType.DOT): 3971 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3972 3973 if not self._match(TokenType.L_PAREN): 3974 return this 3975 3976 expressions = self._parse_csv(self._parse_function_parameter) 3977 self._match_r_paren() 3978 return self.expression( 3979 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3980 ) 3981 3982 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3983 literal = self._parse_primary() 3984 if literal: 3985 return self.expression(exp.Introducer, this=token.text, expression=literal) 3986 3987 return self.expression(exp.Identifier, this=token.text) 3988 3989 def _parse_session_parameter(self) -> exp.SessionParameter: 3990 kind = None 3991 this = self._parse_id_var() or self._parse_primary() 3992 3993 if this and self._match(TokenType.DOT): 3994 kind = this.name 3995 this = self._parse_var() or self._parse_primary() 3996 3997 return self.expression(exp.SessionParameter, this=this, kind=kind) 3998 3999 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4000 index = self._index 4001 4002 if self._match(TokenType.L_PAREN): 4003 expressions = t.cast( 4004 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4005 ) 4006 4007 if not self._match(TokenType.R_PAREN): 4008 self._retreat(index) 4009 else: 4010 expressions = [self._parse_id_var()] 4011 4012 if self._match_set(self.LAMBDAS): 4013 return self.LAMBDAS[self._prev.token_type](self, expressions) 4014 4015 self._retreat(index) 4016 4017 this: t.Optional[exp.Expression] 4018 4019 if self._match(TokenType.DISTINCT): 4020 this = self.expression( 4021 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4022 ) 4023 else: 4024 this = self._parse_select_or_expression(alias=alias) 4025 4026 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 4027 4028 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4029 index = self._index 4030 4031 if not self.errors: 4032 try: 4033 if self._parse_select(nested=True): 4034 return this 4035 except ParseError: 4036 pass 4037 finally: 4038 self.errors.clear() 4039 self._retreat(index) 4040 4041 if not self._match(TokenType.L_PAREN): 4042 return this 4043 4044 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4045 4046 self._match_r_paren() 4047 return self.expression(exp.Schema, this=this, expressions=args) 4048 4049 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4050 return self._parse_column_def(self._parse_field(any_token=True)) 4051 4052 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4053 # column defs are not really columns, they're identifiers 4054 if isinstance(this, exp.Column): 4055 this = this.this 4056 4057 kind = self._parse_types(schema=True) 4058 4059 if self._match_text_seq("FOR", "ORDINALITY"): 4060 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4061 4062 constraints: t.List[exp.Expression] = [] 4063 4064 if not kind and self._match(TokenType.ALIAS): 4065 constraints.append( 4066 self.expression( 4067 exp.ComputedColumnConstraint, 4068 this=self._parse_conjunction(), 4069 persisted=self._match_text_seq("PERSISTED"), 4070 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4071 ) 4072 ) 4073 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4074 self._match(TokenType.ALIAS) 4075 constraints.append( 4076 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4077 ) 4078 4079 while True: 4080 constraint = self._parse_column_constraint() 4081 if not constraint: 4082 break 4083 constraints.append(constraint) 4084 4085 if not kind and not constraints: 4086 return this 4087 4088 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4089 4090 def _parse_auto_increment( 4091 self, 4092 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4093 start = None 4094 increment = None 4095 4096 if self._match(TokenType.L_PAREN, advance=False): 4097 args = self._parse_wrapped_csv(self._parse_bitwise) 4098 start = seq_get(args, 0) 4099 increment = seq_get(args, 1) 4100 elif self._match_text_seq("START"): 4101 start = self._parse_bitwise() 4102 self._match_text_seq("INCREMENT") 4103 increment = self._parse_bitwise() 4104 4105 if start and increment: 4106 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4107 4108 return exp.AutoIncrementColumnConstraint() 4109 4110 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4111 if not self._match_text_seq("REFRESH"): 4112 self._retreat(self._index - 1) 4113 return None 4114 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4115 4116 def _parse_compress(self) -> exp.CompressColumnConstraint: 4117 if self._match(TokenType.L_PAREN, advance=False): 4118 return self.expression( 4119 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4120 ) 4121 4122 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4123 4124 def _parse_generated_as_identity( 4125 self, 4126 ) -> ( 4127 exp.GeneratedAsIdentityColumnConstraint 4128 | exp.ComputedColumnConstraint 4129 | exp.GeneratedAsRowColumnConstraint 4130 ): 4131 if self._match_text_seq("BY", "DEFAULT"): 4132 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4133 this = self.expression( 4134 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4135 ) 4136 else: 4137 self._match_text_seq("ALWAYS") 4138 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4139 4140 self._match(TokenType.ALIAS) 4141 4142 if self._match_text_seq("ROW"): 4143 start = self._match_text_seq("START") 4144 if not start: 4145 self._match(TokenType.END) 4146 hidden = self._match_text_seq("HIDDEN") 4147 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4148 4149 identity = self._match_text_seq("IDENTITY") 4150 4151 if self._match(TokenType.L_PAREN): 4152 if self._match(TokenType.START_WITH): 4153 this.set("start", self._parse_bitwise()) 4154 if self._match_text_seq("INCREMENT", "BY"): 4155 this.set("increment", self._parse_bitwise()) 4156 if self._match_text_seq("MINVALUE"): 4157 this.set("minvalue", self._parse_bitwise()) 4158 if self._match_text_seq("MAXVALUE"): 4159 this.set("maxvalue", self._parse_bitwise()) 4160 4161 if self._match_text_seq("CYCLE"): 4162 this.set("cycle", True) 4163 elif self._match_text_seq("NO", "CYCLE"): 4164 this.set("cycle", False) 4165 4166 if not identity: 4167 this.set("expression", self._parse_bitwise()) 4168 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4169 args = self._parse_csv(self._parse_bitwise) 4170 this.set("start", seq_get(args, 0)) 4171 this.set("increment", seq_get(args, 1)) 4172 4173 self._match_r_paren() 4174 4175 return this 4176 4177 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4178 self._match_text_seq("LENGTH") 4179 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4180 4181 def _parse_not_constraint( 4182 self, 4183 ) -> t.Optional[exp.Expression]: 4184 if self._match_text_seq("NULL"): 4185 return self.expression(exp.NotNullColumnConstraint) 4186 if self._match_text_seq("CASESPECIFIC"): 4187 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4188 if self._match_text_seq("FOR", "REPLICATION"): 4189 return self.expression(exp.NotForReplicationColumnConstraint) 4190 return None 4191 4192 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4193 if self._match(TokenType.CONSTRAINT): 4194 this = self._parse_id_var() 4195 else: 4196 this = None 4197 4198 if self._match_texts(self.CONSTRAINT_PARSERS): 4199 return self.expression( 4200 exp.ColumnConstraint, 4201 this=this, 4202 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4203 ) 4204 4205 return this 4206 4207 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4208 if not self._match(TokenType.CONSTRAINT): 4209 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4210 4211 this = self._parse_id_var() 4212 expressions = [] 4213 4214 while True: 4215 constraint = self._parse_unnamed_constraint() or self._parse_function() 4216 if not constraint: 4217 break 4218 expressions.append(constraint) 4219 4220 return self.expression(exp.Constraint, this=this, expressions=expressions) 4221 4222 def _parse_unnamed_constraint( 4223 self, constraints: t.Optional[t.Collection[str]] = None 4224 ) -> t.Optional[exp.Expression]: 4225 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4226 constraints or self.CONSTRAINT_PARSERS 4227 ): 4228 return None 4229 4230 constraint = self._prev.text.upper() 4231 if constraint not in self.CONSTRAINT_PARSERS: 4232 self.raise_error(f"No parser found for schema constraint {constraint}.") 4233 4234 return self.CONSTRAINT_PARSERS[constraint](self) 4235 4236 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4237 self._match_text_seq("KEY") 4238 return self.expression( 4239 exp.UniqueColumnConstraint, 4240 this=self._parse_schema(self._parse_id_var(any_token=False)), 4241 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4242 ) 4243 4244 def _parse_key_constraint_options(self) -> t.List[str]: 4245 options = [] 4246 while True: 4247 if not self._curr: 4248 break 4249 4250 if self._match(TokenType.ON): 4251 action = None 4252 on = self._advance_any() and self._prev.text 4253 4254 if self._match_text_seq("NO", "ACTION"): 4255 action = "NO ACTION" 4256 elif self._match_text_seq("CASCADE"): 4257 action = "CASCADE" 4258 elif self._match_text_seq("RESTRICT"): 4259 action = "RESTRICT" 4260 elif self._match_pair(TokenType.SET, TokenType.NULL): 4261 action = "SET NULL" 4262 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4263 action = "SET DEFAULT" 4264 else: 4265 self.raise_error("Invalid key constraint") 4266 4267 options.append(f"ON {on} {action}") 4268 elif self._match_text_seq("NOT", "ENFORCED"): 4269 options.append("NOT ENFORCED") 4270 elif self._match_text_seq("DEFERRABLE"): 4271 options.append("DEFERRABLE") 4272 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4273 options.append("INITIALLY DEFERRED") 4274 elif self._match_text_seq("NORELY"): 4275 options.append("NORELY") 4276 elif self._match_text_seq("MATCH", "FULL"): 4277 options.append("MATCH FULL") 4278 else: 4279 break 4280 4281 return options 4282 4283 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4284 if match and not self._match(TokenType.REFERENCES): 4285 return None 4286 4287 expressions = None 4288 this = self._parse_table(schema=True) 4289 options = self._parse_key_constraint_options() 4290 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4291 4292 def _parse_foreign_key(self) -> exp.ForeignKey: 4293 expressions = self._parse_wrapped_id_vars() 4294 reference = self._parse_references() 4295 options = {} 4296 4297 while self._match(TokenType.ON): 4298 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4299 self.raise_error("Expected DELETE or UPDATE") 4300 4301 kind = self._prev.text.lower() 4302 4303 if self._match_text_seq("NO", "ACTION"): 4304 action = "NO ACTION" 4305 elif self._match(TokenType.SET): 4306 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4307 action = "SET " + self._prev.text.upper() 4308 else: 4309 self._advance() 4310 action = self._prev.text.upper() 4311 4312 options[kind] = action 4313 4314 return self.expression( 4315 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4316 ) 4317 4318 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4319 return self._parse_field() 4320 4321 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4322 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4323 self._retreat(self._index - 1) 4324 return None 4325 4326 id_vars = self._parse_wrapped_id_vars() 4327 return self.expression( 4328 exp.PeriodForSystemTimeConstraint, 4329 this=seq_get(id_vars, 0), 4330 expression=seq_get(id_vars, 1), 4331 ) 4332 4333 def _parse_primary_key( 4334 self, wrapped_optional: bool = False, in_props: bool = False 4335 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4336 desc = ( 4337 self._match_set((TokenType.ASC, TokenType.DESC)) 4338 and self._prev.token_type == TokenType.DESC 4339 ) 4340 4341 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4342 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4343 4344 expressions = self._parse_wrapped_csv( 4345 self._parse_primary_key_part, optional=wrapped_optional 4346 ) 4347 options = self._parse_key_constraint_options() 4348 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4349 4350 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4351 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4352 4353 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4354 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4355 return this 4356 4357 bracket_kind = self._prev.token_type 4358 expressions = self._parse_csv( 4359 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4360 ) 4361 4362 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4363 self.raise_error("Expected ]") 4364 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4365 self.raise_error("Expected }") 4366 4367 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4368 if bracket_kind == TokenType.L_BRACE: 4369 this = self.expression(exp.Struct, expressions=expressions) 4370 elif not this or this.name.upper() == "ARRAY": 4371 this = self.expression(exp.Array, expressions=expressions) 4372 else: 4373 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4374 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4375 4376 self._add_comments(this) 4377 return self._parse_bracket(this) 4378 4379 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4380 if self._match(TokenType.COLON): 4381 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4382 return this 4383 4384 def _parse_case(self) -> t.Optional[exp.Expression]: 4385 ifs = [] 4386 default = None 4387 4388 comments = self._prev_comments 4389 expression = self._parse_conjunction() 4390 4391 while self._match(TokenType.WHEN): 4392 this = self._parse_conjunction() 4393 self._match(TokenType.THEN) 4394 then = self._parse_conjunction() 4395 ifs.append(self.expression(exp.If, this=this, true=then)) 4396 4397 if self._match(TokenType.ELSE): 4398 default = self._parse_conjunction() 4399 4400 if not self._match(TokenType.END): 4401 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4402 default = exp.column("interval") 4403 else: 4404 self.raise_error("Expected END after CASE", self._prev) 4405 4406 return self._parse_window( 4407 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4408 ) 4409 4410 def _parse_if(self) -> t.Optional[exp.Expression]: 4411 if self._match(TokenType.L_PAREN): 4412 args = self._parse_csv(self._parse_conjunction) 4413 this = self.validate_expression(exp.If.from_arg_list(args), args) 4414 self._match_r_paren() 4415 else: 4416 index = self._index - 1 4417 condition = self._parse_conjunction() 4418 4419 if not condition: 4420 self._retreat(index) 4421 return None 4422 4423 self._match(TokenType.THEN) 4424 true = self._parse_conjunction() 4425 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4426 self._match(TokenType.END) 4427 this = self.expression(exp.If, this=condition, true=true, false=false) 4428 4429 return self._parse_window(this) 4430 4431 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4432 if not self._match_text_seq("VALUE", "FOR"): 4433 self._retreat(self._index - 1) 4434 return None 4435 4436 return self.expression( 4437 exp.NextValueFor, 4438 this=self._parse_column(), 4439 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4440 ) 4441 4442 def _parse_extract(self) -> exp.Extract: 4443 this = self._parse_function() or self._parse_var() or self._parse_type() 4444 4445 if self._match(TokenType.FROM): 4446 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4447 4448 if not self._match(TokenType.COMMA): 4449 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4450 4451 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4452 4453 def _parse_any_value(self) -> exp.AnyValue: 4454 this = self._parse_lambda() 4455 is_max = None 4456 having = None 4457 4458 if self._match(TokenType.HAVING): 4459 self._match_texts(("MAX", "MIN")) 4460 is_max = self._prev.text == "MAX" 4461 having = self._parse_column() 4462 4463 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4464 4465 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4466 this = self._parse_conjunction() 4467 4468 if not self._match(TokenType.ALIAS): 4469 if self._match(TokenType.COMMA): 4470 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4471 4472 self.raise_error("Expected AS after CAST") 4473 4474 fmt = None 4475 to = self._parse_types() 4476 4477 if self._match(TokenType.FORMAT): 4478 fmt_string = self._parse_string() 4479 fmt = self._parse_at_time_zone(fmt_string) 4480 4481 if not to: 4482 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4483 if to.this in exp.DataType.TEMPORAL_TYPES: 4484 this = self.expression( 4485 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4486 this=this, 4487 format=exp.Literal.string( 4488 format_time( 4489 fmt_string.this if fmt_string else "", 4490 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4491 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4492 ) 4493 ), 4494 ) 4495 4496 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4497 this.set("zone", fmt.args["zone"]) 4498 return this 4499 elif not to: 4500 self.raise_error("Expected TYPE after CAST") 4501 elif isinstance(to, exp.Identifier): 4502 to = exp.DataType.build(to.name, udt=True) 4503 elif to.this == exp.DataType.Type.CHAR: 4504 if self._match(TokenType.CHARACTER_SET): 4505 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4506 4507 return self.expression( 4508 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4509 ) 4510 4511 def _parse_string_agg(self) -> exp.Expression: 4512 if self._match(TokenType.DISTINCT): 4513 args: t.List[t.Optional[exp.Expression]] = [ 4514 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4515 ] 4516 if self._match(TokenType.COMMA): 4517 args.extend(self._parse_csv(self._parse_conjunction)) 4518 else: 4519 args = self._parse_csv(self._parse_conjunction) # type: ignore 4520 4521 index = self._index 4522 if not self._match(TokenType.R_PAREN) and args: 4523 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4524 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4525 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4526 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4527 4528 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4529 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4530 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4531 if not self._match_text_seq("WITHIN", "GROUP"): 4532 self._retreat(index) 4533 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4534 4535 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4536 order = self._parse_order(this=seq_get(args, 0)) 4537 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4538 4539 def _parse_convert( 4540 self, strict: bool, safe: t.Optional[bool] = None 4541 ) -> t.Optional[exp.Expression]: 4542 this = self._parse_bitwise() 4543 4544 if self._match(TokenType.USING): 4545 to: t.Optional[exp.Expression] = self.expression( 4546 exp.CharacterSet, this=self._parse_var() 4547 ) 4548 elif self._match(TokenType.COMMA): 4549 to = self._parse_types() 4550 else: 4551 to = None 4552 4553 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4554 4555 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4556 """ 4557 There are generally two variants of the DECODE function: 4558 4559 - DECODE(bin, charset) 4560 - DECODE(expression, search, result [, search, result] ... [, default]) 4561 4562 The second variant will always be parsed into a CASE expression. Note that NULL 4563 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4564 instead of relying on pattern matching. 4565 """ 4566 args = self._parse_csv(self._parse_conjunction) 4567 4568 if len(args) < 3: 4569 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4570 4571 expression, *expressions = args 4572 if not expression: 4573 return None 4574 4575 ifs = [] 4576 for search, result in zip(expressions[::2], expressions[1::2]): 4577 if not search or not result: 4578 return None 4579 4580 if isinstance(search, exp.Literal): 4581 ifs.append( 4582 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4583 ) 4584 elif isinstance(search, exp.Null): 4585 ifs.append( 4586 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4587 ) 4588 else: 4589 cond = exp.or_( 4590 exp.EQ(this=expression.copy(), expression=search), 4591 exp.and_( 4592 exp.Is(this=expression.copy(), expression=exp.Null()), 4593 exp.Is(this=search.copy(), expression=exp.Null()), 4594 copy=False, 4595 ), 4596 copy=False, 4597 ) 4598 ifs.append(exp.If(this=cond, true=result)) 4599 4600 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4601 4602 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4603 self._match_text_seq("KEY") 4604 key = self._parse_column() 4605 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4606 self._match_text_seq("VALUE") 4607 value = self._parse_bitwise() 4608 4609 if not key and not value: 4610 return None 4611 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4612 4613 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4614 if not this or not self._match_text_seq("FORMAT", "JSON"): 4615 return this 4616 4617 return self.expression(exp.FormatJson, this=this) 4618 4619 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4620 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4621 for value in values: 4622 if self._match_text_seq(value, "ON", on): 4623 return f"{value} ON {on}" 4624 4625 return None 4626 4627 @t.overload 4628 def _parse_json_object(self, agg: Literal[False]) -> exp.JSONObject: 4629 ... 4630 4631 @t.overload 4632 def _parse_json_object(self, agg: Literal[True]) -> exp.JSONObjectAgg: 4633 ... 4634 4635 def _parse_json_object(self, agg=False): 4636 star = self._parse_star() 4637 expressions = ( 4638 [star] 4639 if star 4640 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4641 ) 4642 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4643 4644 unique_keys = None 4645 if self._match_text_seq("WITH", "UNIQUE"): 4646 unique_keys = True 4647 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4648 unique_keys = False 4649 4650 self._match_text_seq("KEYS") 4651 4652 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4653 self._parse_type() 4654 ) 4655 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4656 4657 return self.expression( 4658 exp.JSONObjectAgg if agg else exp.JSONObject, 4659 expressions=expressions, 4660 null_handling=null_handling, 4661 unique_keys=unique_keys, 4662 return_type=return_type, 4663 encoding=encoding, 4664 ) 4665 4666 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4667 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4668 if not self._match_text_seq("NESTED"): 4669 this = self._parse_id_var() 4670 kind = self._parse_types(allow_identifiers=False) 4671 nested = None 4672 else: 4673 this = None 4674 kind = None 4675 nested = True 4676 4677 path = self._match_text_seq("PATH") and self._parse_string() 4678 nested_schema = nested and self._parse_json_schema() 4679 4680 return self.expression( 4681 exp.JSONColumnDef, 4682 this=this, 4683 kind=kind, 4684 path=path, 4685 nested_schema=nested_schema, 4686 ) 4687 4688 def _parse_json_schema(self) -> exp.JSONSchema: 4689 self._match_text_seq("COLUMNS") 4690 return self.expression( 4691 exp.JSONSchema, 4692 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4693 ) 4694 4695 def _parse_json_table(self) -> exp.JSONTable: 4696 this = self._parse_format_json(self._parse_bitwise()) 4697 path = self._match(TokenType.COMMA) and self._parse_string() 4698 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4699 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4700 schema = self._parse_json_schema() 4701 4702 return exp.JSONTable( 4703 this=this, 4704 schema=schema, 4705 path=path, 4706 error_handling=error_handling, 4707 empty_handling=empty_handling, 4708 ) 4709 4710 def _parse_match_against(self) -> exp.MatchAgainst: 4711 expressions = self._parse_csv(self._parse_column) 4712 4713 self._match_text_seq(")", "AGAINST", "(") 4714 4715 this = self._parse_string() 4716 4717 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4718 modifier = "IN NATURAL LANGUAGE MODE" 4719 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4720 modifier = f"{modifier} WITH QUERY EXPANSION" 4721 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4722 modifier = "IN BOOLEAN MODE" 4723 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4724 modifier = "WITH QUERY EXPANSION" 4725 else: 4726 modifier = None 4727 4728 return self.expression( 4729 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4730 ) 4731 4732 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4733 def _parse_open_json(self) -> exp.OpenJSON: 4734 this = self._parse_bitwise() 4735 path = self._match(TokenType.COMMA) and self._parse_string() 4736 4737 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4738 this = self._parse_field(any_token=True) 4739 kind = self._parse_types() 4740 path = self._parse_string() 4741 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4742 4743 return self.expression( 4744 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4745 ) 4746 4747 expressions = None 4748 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4749 self._match_l_paren() 4750 expressions = self._parse_csv(_parse_open_json_column_def) 4751 4752 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4753 4754 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4755 args = self._parse_csv(self._parse_bitwise) 4756 4757 if self._match(TokenType.IN): 4758 return self.expression( 4759 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4760 ) 4761 4762 if haystack_first: 4763 haystack = seq_get(args, 0) 4764 needle = seq_get(args, 1) 4765 else: 4766 needle = seq_get(args, 0) 4767 haystack = seq_get(args, 1) 4768 4769 return self.expression( 4770 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4771 ) 4772 4773 def _parse_predict(self) -> exp.Predict: 4774 self._match_text_seq("MODEL") 4775 this = self._parse_table() 4776 4777 self._match(TokenType.COMMA) 4778 self._match_text_seq("TABLE") 4779 4780 return self.expression( 4781 exp.Predict, 4782 this=this, 4783 expression=self._parse_table(), 4784 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4785 ) 4786 4787 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4788 args = self._parse_csv(self._parse_table) 4789 return exp.JoinHint(this=func_name.upper(), expressions=args) 4790 4791 def _parse_substring(self) -> exp.Substring: 4792 # Postgres supports the form: substring(string [from int] [for int]) 4793 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4794 4795 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4796 4797 if self._match(TokenType.FROM): 4798 args.append(self._parse_bitwise()) 4799 if self._match(TokenType.FOR): 4800 args.append(self._parse_bitwise()) 4801 4802 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4803 4804 def _parse_trim(self) -> exp.Trim: 4805 # https://www.w3resource.com/sql/character-functions/trim.php 4806 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4807 4808 position = None 4809 collation = None 4810 expression = None 4811 4812 if self._match_texts(self.TRIM_TYPES): 4813 position = self._prev.text.upper() 4814 4815 this = self._parse_bitwise() 4816 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4817 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4818 expression = self._parse_bitwise() 4819 4820 if invert_order: 4821 this, expression = expression, this 4822 4823 if self._match(TokenType.COLLATE): 4824 collation = self._parse_bitwise() 4825 4826 return self.expression( 4827 exp.Trim, this=this, position=position, expression=expression, collation=collation 4828 ) 4829 4830 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4831 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4832 4833 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4834 return self._parse_window(self._parse_id_var(), alias=True) 4835 4836 def _parse_respect_or_ignore_nulls( 4837 self, this: t.Optional[exp.Expression] 4838 ) -> t.Optional[exp.Expression]: 4839 if self._match_text_seq("IGNORE", "NULLS"): 4840 return self.expression(exp.IgnoreNulls, this=this) 4841 if self._match_text_seq("RESPECT", "NULLS"): 4842 return self.expression(exp.RespectNulls, this=this) 4843 return this 4844 4845 def _parse_window( 4846 self, this: t.Optional[exp.Expression], alias: bool = False 4847 ) -> t.Optional[exp.Expression]: 4848 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4849 self._match(TokenType.WHERE) 4850 this = self.expression( 4851 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4852 ) 4853 self._match_r_paren() 4854 4855 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4856 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4857 if self._match_text_seq("WITHIN", "GROUP"): 4858 order = self._parse_wrapped(self._parse_order) 4859 this = self.expression(exp.WithinGroup, this=this, expression=order) 4860 4861 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4862 # Some dialects choose to implement and some do not. 4863 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4864 4865 # There is some code above in _parse_lambda that handles 4866 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4867 4868 # The below changes handle 4869 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4870 4871 # Oracle allows both formats 4872 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4873 # and Snowflake chose to do the same for familiarity 4874 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4875 this = self._parse_respect_or_ignore_nulls(this) 4876 4877 # bigquery select from window x AS (partition by ...) 4878 if alias: 4879 over = None 4880 self._match(TokenType.ALIAS) 4881 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4882 return this 4883 else: 4884 over = self._prev.text.upper() 4885 4886 if not self._match(TokenType.L_PAREN): 4887 return self.expression( 4888 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4889 ) 4890 4891 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4892 4893 first = self._match(TokenType.FIRST) 4894 if self._match_text_seq("LAST"): 4895 first = False 4896 4897 partition, order = self._parse_partition_and_order() 4898 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4899 4900 if kind: 4901 self._match(TokenType.BETWEEN) 4902 start = self._parse_window_spec() 4903 self._match(TokenType.AND) 4904 end = self._parse_window_spec() 4905 4906 spec = self.expression( 4907 exp.WindowSpec, 4908 kind=kind, 4909 start=start["value"], 4910 start_side=start["side"], 4911 end=end["value"], 4912 end_side=end["side"], 4913 ) 4914 else: 4915 spec = None 4916 4917 self._match_r_paren() 4918 4919 window = self.expression( 4920 exp.Window, 4921 this=this, 4922 partition_by=partition, 4923 order=order, 4924 spec=spec, 4925 alias=window_alias, 4926 over=over, 4927 first=first, 4928 ) 4929 4930 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4931 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4932 return self._parse_window(window, alias=alias) 4933 4934 return window 4935 4936 def _parse_partition_and_order( 4937 self, 4938 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4939 return self._parse_partition_by(), self._parse_order() 4940 4941 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4942 self._match(TokenType.BETWEEN) 4943 4944 return { 4945 "value": ( 4946 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4947 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4948 or self._parse_bitwise() 4949 ), 4950 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4951 } 4952 4953 def _parse_alias( 4954 self, this: t.Optional[exp.Expression], explicit: bool = False 4955 ) -> t.Optional[exp.Expression]: 4956 any_token = self._match(TokenType.ALIAS) 4957 comments = self._prev_comments 4958 4959 if explicit and not any_token: 4960 return this 4961 4962 if self._match(TokenType.L_PAREN): 4963 aliases = self.expression( 4964 exp.Aliases, 4965 comments=comments, 4966 this=this, 4967 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4968 ) 4969 self._match_r_paren(aliases) 4970 return aliases 4971 4972 alias = self._parse_id_var(any_token) or ( 4973 self.STRING_ALIASES and self._parse_string_as_identifier() 4974 ) 4975 4976 if alias: 4977 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 4978 4979 # Moves the comment next to the alias in `expr /* comment */ AS alias` 4980 if not this.comments and this.this.comments: 4981 this.comments = this.this.comments 4982 this.this.comments = None 4983 4984 return this 4985 4986 def _parse_id_var( 4987 self, 4988 any_token: bool = True, 4989 tokens: t.Optional[t.Collection[TokenType]] = None, 4990 ) -> t.Optional[exp.Expression]: 4991 identifier = self._parse_identifier() 4992 4993 if identifier: 4994 return identifier 4995 4996 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4997 quoted = self._prev.token_type == TokenType.STRING 4998 return exp.Identifier(this=self._prev.text, quoted=quoted) 4999 5000 return None 5001 5002 def _parse_string(self) -> t.Optional[exp.Expression]: 5003 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 5004 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 5005 return self._parse_placeholder() 5006 5007 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5008 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5009 5010 def _parse_number(self) -> t.Optional[exp.Expression]: 5011 if self._match(TokenType.NUMBER): 5012 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 5013 return self._parse_placeholder() 5014 5015 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5016 if self._match(TokenType.IDENTIFIER): 5017 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5018 return self._parse_placeholder() 5019 5020 def _parse_var( 5021 self, 5022 any_token: bool = False, 5023 tokens: t.Optional[t.Collection[TokenType]] = None, 5024 upper: bool = False, 5025 ) -> t.Optional[exp.Expression]: 5026 if ( 5027 (any_token and self._advance_any()) 5028 or self._match(TokenType.VAR) 5029 or (self._match_set(tokens) if tokens else False) 5030 ): 5031 return self.expression( 5032 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5033 ) 5034 return self._parse_placeholder() 5035 5036 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5037 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5038 self._advance() 5039 return self._prev 5040 return None 5041 5042 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5043 return self._parse_var() or self._parse_string() 5044 5045 def _parse_null(self) -> t.Optional[exp.Expression]: 5046 if self._match_set(self.NULL_TOKENS): 5047 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5048 return self._parse_placeholder() 5049 5050 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5051 if self._match(TokenType.TRUE): 5052 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5053 if self._match(TokenType.FALSE): 5054 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5055 return self._parse_placeholder() 5056 5057 def _parse_star(self) -> t.Optional[exp.Expression]: 5058 if self._match(TokenType.STAR): 5059 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5060 return self._parse_placeholder() 5061 5062 def _parse_parameter(self) -> exp.Parameter: 5063 def _parse_parameter_part() -> t.Optional[exp.Expression]: 5064 return ( 5065 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 5066 ) 5067 5068 self._match(TokenType.L_BRACE) 5069 this = _parse_parameter_part() 5070 expression = self._match(TokenType.COLON) and _parse_parameter_part() 5071 self._match(TokenType.R_BRACE) 5072 5073 return self.expression(exp.Parameter, this=this, expression=expression) 5074 5075 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5076 if self._match_set(self.PLACEHOLDER_PARSERS): 5077 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5078 if placeholder: 5079 return placeholder 5080 self._advance(-1) 5081 return None 5082 5083 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5084 if not self._match(TokenType.EXCEPT): 5085 return None 5086 if self._match(TokenType.L_PAREN, advance=False): 5087 return self._parse_wrapped_csv(self._parse_column) 5088 5089 except_column = self._parse_column() 5090 return [except_column] if except_column else None 5091 5092 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5093 if not self._match(TokenType.REPLACE): 5094 return None 5095 if self._match(TokenType.L_PAREN, advance=False): 5096 return self._parse_wrapped_csv(self._parse_expression) 5097 5098 replace_expression = self._parse_expression() 5099 return [replace_expression] if replace_expression else None 5100 5101 def _parse_csv( 5102 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5103 ) -> t.List[exp.Expression]: 5104 parse_result = parse_method() 5105 items = [parse_result] if parse_result is not None else [] 5106 5107 while self._match(sep): 5108 self._add_comments(parse_result) 5109 parse_result = parse_method() 5110 if parse_result is not None: 5111 items.append(parse_result) 5112 5113 return items 5114 5115 def _parse_tokens( 5116 self, parse_method: t.Callable, expressions: t.Dict 5117 ) -> t.Optional[exp.Expression]: 5118 this = parse_method() 5119 5120 while self._match_set(expressions): 5121 this = self.expression( 5122 expressions[self._prev.token_type], 5123 this=this, 5124 comments=self._prev_comments, 5125 expression=parse_method(), 5126 ) 5127 5128 return this 5129 5130 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5131 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5132 5133 def _parse_wrapped_csv( 5134 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5135 ) -> t.List[exp.Expression]: 5136 return self._parse_wrapped( 5137 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5138 ) 5139 5140 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5141 wrapped = self._match(TokenType.L_PAREN) 5142 if not wrapped and not optional: 5143 self.raise_error("Expecting (") 5144 parse_result = parse_method() 5145 if wrapped: 5146 self._match_r_paren() 5147 return parse_result 5148 5149 def _parse_expressions(self) -> t.List[exp.Expression]: 5150 return self._parse_csv(self._parse_expression) 5151 5152 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5153 return self._parse_select() or self._parse_set_operations( 5154 self._parse_expression() if alias else self._parse_conjunction() 5155 ) 5156 5157 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5158 return self._parse_query_modifiers( 5159 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5160 ) 5161 5162 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5163 this = None 5164 if self._match_texts(self.TRANSACTION_KIND): 5165 this = self._prev.text 5166 5167 self._match_texts(("TRANSACTION", "WORK")) 5168 5169 modes = [] 5170 while True: 5171 mode = [] 5172 while self._match(TokenType.VAR): 5173 mode.append(self._prev.text) 5174 5175 if mode: 5176 modes.append(" ".join(mode)) 5177 if not self._match(TokenType.COMMA): 5178 break 5179 5180 return self.expression(exp.Transaction, this=this, modes=modes) 5181 5182 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5183 chain = None 5184 savepoint = None 5185 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5186 5187 self._match_texts(("TRANSACTION", "WORK")) 5188 5189 if self._match_text_seq("TO"): 5190 self._match_text_seq("SAVEPOINT") 5191 savepoint = self._parse_id_var() 5192 5193 if self._match(TokenType.AND): 5194 chain = not self._match_text_seq("NO") 5195 self._match_text_seq("CHAIN") 5196 5197 if is_rollback: 5198 return self.expression(exp.Rollback, savepoint=savepoint) 5199 5200 return self.expression(exp.Commit, chain=chain) 5201 5202 def _parse_refresh(self) -> exp.Refresh: 5203 self._match(TokenType.TABLE) 5204 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5205 5206 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5207 if not self._match_text_seq("ADD"): 5208 return None 5209 5210 self._match(TokenType.COLUMN) 5211 exists_column = self._parse_exists(not_=True) 5212 expression = self._parse_field_def() 5213 5214 if expression: 5215 expression.set("exists", exists_column) 5216 5217 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5218 if self._match_texts(("FIRST", "AFTER")): 5219 position = self._prev.text 5220 column_position = self.expression( 5221 exp.ColumnPosition, this=self._parse_column(), position=position 5222 ) 5223 expression.set("position", column_position) 5224 5225 return expression 5226 5227 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5228 drop = self._match(TokenType.DROP) and self._parse_drop() 5229 if drop and not isinstance(drop, exp.Command): 5230 drop.set("kind", drop.args.get("kind", "COLUMN")) 5231 return drop 5232 5233 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5234 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5235 return self.expression( 5236 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5237 ) 5238 5239 def _parse_add_constraint(self) -> exp.AddConstraint: 5240 this = None 5241 kind = self._prev.token_type 5242 5243 if kind == TokenType.CONSTRAINT: 5244 this = self._parse_id_var() 5245 5246 if self._match_text_seq("CHECK"): 5247 expression = self._parse_wrapped(self._parse_conjunction) 5248 enforced = self._match_text_seq("ENFORCED") 5249 5250 return self.expression( 5251 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5252 ) 5253 5254 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5255 expression = self._parse_foreign_key() 5256 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5257 expression = self._parse_primary_key() 5258 else: 5259 expression = None 5260 5261 return self.expression(exp.AddConstraint, this=this, expression=expression) 5262 5263 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5264 index = self._index - 1 5265 5266 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5267 return self._parse_csv(self._parse_add_constraint) 5268 5269 self._retreat(index) 5270 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5271 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5272 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5273 5274 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5275 self._match(TokenType.COLUMN) 5276 column = self._parse_field(any_token=True) 5277 5278 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5279 return self.expression(exp.AlterColumn, this=column, drop=True) 5280 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5281 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5282 5283 self._match_text_seq("SET", "DATA") 5284 return self.expression( 5285 exp.AlterColumn, 5286 this=column, 5287 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5288 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5289 using=self._match(TokenType.USING) and self._parse_conjunction(), 5290 ) 5291 5292 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5293 index = self._index - 1 5294 5295 partition_exists = self._parse_exists() 5296 if self._match(TokenType.PARTITION, advance=False): 5297 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5298 5299 self._retreat(index) 5300 return self._parse_csv(self._parse_drop_column) 5301 5302 def _parse_alter_table_rename(self) -> exp.RenameTable: 5303 self._match_text_seq("TO") 5304 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5305 5306 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5307 start = self._prev 5308 5309 if not self._match(TokenType.TABLE): 5310 return self._parse_as_command(start) 5311 5312 exists = self._parse_exists() 5313 only = self._match_text_seq("ONLY") 5314 this = self._parse_table(schema=True) 5315 5316 if self._next: 5317 self._advance() 5318 5319 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5320 if parser: 5321 actions = ensure_list(parser(self)) 5322 5323 if not self._curr: 5324 return self.expression( 5325 exp.AlterTable, 5326 this=this, 5327 exists=exists, 5328 actions=actions, 5329 only=only, 5330 ) 5331 5332 return self._parse_as_command(start) 5333 5334 def _parse_merge(self) -> exp.Merge: 5335 self._match(TokenType.INTO) 5336 target = self._parse_table() 5337 5338 if target and self._match(TokenType.ALIAS, advance=False): 5339 target.set("alias", self._parse_table_alias()) 5340 5341 self._match(TokenType.USING) 5342 using = self._parse_table() 5343 5344 self._match(TokenType.ON) 5345 on = self._parse_conjunction() 5346 5347 return self.expression( 5348 exp.Merge, 5349 this=target, 5350 using=using, 5351 on=on, 5352 expressions=self._parse_when_matched(), 5353 ) 5354 5355 def _parse_when_matched(self) -> t.List[exp.When]: 5356 whens = [] 5357 5358 while self._match(TokenType.WHEN): 5359 matched = not self._match(TokenType.NOT) 5360 self._match_text_seq("MATCHED") 5361 source = ( 5362 False 5363 if self._match_text_seq("BY", "TARGET") 5364 else self._match_text_seq("BY", "SOURCE") 5365 ) 5366 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5367 5368 self._match(TokenType.THEN) 5369 5370 if self._match(TokenType.INSERT): 5371 _this = self._parse_star() 5372 if _this: 5373 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5374 else: 5375 then = self.expression( 5376 exp.Insert, 5377 this=self._parse_value(), 5378 expression=self._match(TokenType.VALUES) and self._parse_value(), 5379 ) 5380 elif self._match(TokenType.UPDATE): 5381 expressions = self._parse_star() 5382 if expressions: 5383 then = self.expression(exp.Update, expressions=expressions) 5384 else: 5385 then = self.expression( 5386 exp.Update, 5387 expressions=self._match(TokenType.SET) 5388 and self._parse_csv(self._parse_equality), 5389 ) 5390 elif self._match(TokenType.DELETE): 5391 then = self.expression(exp.Var, this=self._prev.text) 5392 else: 5393 then = None 5394 5395 whens.append( 5396 self.expression( 5397 exp.When, 5398 matched=matched, 5399 source=source, 5400 condition=condition, 5401 then=then, 5402 ) 5403 ) 5404 return whens 5405 5406 def _parse_show(self) -> t.Optional[exp.Expression]: 5407 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5408 if parser: 5409 return parser(self) 5410 return self._parse_as_command(self._prev) 5411 5412 def _parse_set_item_assignment( 5413 self, kind: t.Optional[str] = None 5414 ) -> t.Optional[exp.Expression]: 5415 index = self._index 5416 5417 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5418 return self._parse_set_transaction(global_=kind == "GLOBAL") 5419 5420 left = self._parse_primary() or self._parse_id_var() 5421 assignment_delimiter = self._match_texts(("=", "TO")) 5422 5423 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5424 self._retreat(index) 5425 return None 5426 5427 right = self._parse_statement() or self._parse_id_var() 5428 this = self.expression(exp.EQ, this=left, expression=right) 5429 5430 return self.expression(exp.SetItem, this=this, kind=kind) 5431 5432 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5433 self._match_text_seq("TRANSACTION") 5434 characteristics = self._parse_csv( 5435 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5436 ) 5437 return self.expression( 5438 exp.SetItem, 5439 expressions=characteristics, 5440 kind="TRANSACTION", 5441 **{"global": global_}, # type: ignore 5442 ) 5443 5444 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5445 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5446 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5447 5448 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5449 index = self._index 5450 set_ = self.expression( 5451 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5452 ) 5453 5454 if self._curr: 5455 self._retreat(index) 5456 return self._parse_as_command(self._prev) 5457 5458 return set_ 5459 5460 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5461 for option in options: 5462 if self._match_text_seq(*option.split(" ")): 5463 return exp.var(option) 5464 return None 5465 5466 def _parse_as_command(self, start: Token) -> exp.Command: 5467 while self._curr: 5468 self._advance() 5469 text = self._find_sql(start, self._prev) 5470 size = len(start.text) 5471 return exp.Command(this=text[:size], expression=text[size:]) 5472 5473 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5474 settings = [] 5475 5476 self._match_l_paren() 5477 kind = self._parse_id_var() 5478 5479 if self._match(TokenType.L_PAREN): 5480 while True: 5481 key = self._parse_id_var() 5482 value = self._parse_primary() 5483 5484 if not key and value is None: 5485 break 5486 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5487 self._match(TokenType.R_PAREN) 5488 5489 self._match_r_paren() 5490 5491 return self.expression( 5492 exp.DictProperty, 5493 this=this, 5494 kind=kind.this if kind else None, 5495 settings=settings, 5496 ) 5497 5498 def _parse_dict_range(self, this: str) -> exp.DictRange: 5499 self._match_l_paren() 5500 has_min = self._match_text_seq("MIN") 5501 if has_min: 5502 min = self._parse_var() or self._parse_primary() 5503 self._match_text_seq("MAX") 5504 max = self._parse_var() or self._parse_primary() 5505 else: 5506 max = self._parse_var() or self._parse_primary() 5507 min = exp.Literal.number(0) 5508 self._match_r_paren() 5509 return self.expression(exp.DictRange, this=this, min=min, max=max) 5510 5511 def _parse_comprehension( 5512 self, this: t.Optional[exp.Expression] 5513 ) -> t.Optional[exp.Comprehension]: 5514 index = self._index 5515 expression = self._parse_column() 5516 if not self._match(TokenType.IN): 5517 self._retreat(index - 1) 5518 return None 5519 iterator = self._parse_column() 5520 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5521 return self.expression( 5522 exp.Comprehension, 5523 this=this, 5524 expression=expression, 5525 iterator=iterator, 5526 condition=condition, 5527 ) 5528 5529 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5530 if self._match(TokenType.HEREDOC_STRING): 5531 return self.expression(exp.Heredoc, this=self._prev.text) 5532 5533 if not self._match_text_seq("$"): 5534 return None 5535 5536 tags = ["$"] 5537 tag_text = None 5538 5539 if self._is_connected(): 5540 self._advance() 5541 tags.append(self._prev.text.upper()) 5542 else: 5543 self.raise_error("No closing $ found") 5544 5545 if tags[-1] != "$": 5546 if self._is_connected() and self._match_text_seq("$"): 5547 tag_text = tags[-1] 5548 tags.append("$") 5549 else: 5550 self.raise_error("No closing $ found") 5551 5552 heredoc_start = self._curr 5553 5554 while self._curr: 5555 if self._match_text_seq(*tags, advance=False): 5556 this = self._find_sql(heredoc_start, self._prev) 5557 self._advance(len(tags)) 5558 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5559 5560 self._advance() 5561 5562 self.raise_error(f"No closing {''.join(tags)} found") 5563 return None 5564 5565 def _find_parser( 5566 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5567 ) -> t.Optional[t.Callable]: 5568 if not self._curr: 5569 return None 5570 5571 index = self._index 5572 this = [] 5573 while True: 5574 # The current token might be multiple words 5575 curr = self._curr.text.upper() 5576 key = curr.split(" ") 5577 this.append(curr) 5578 5579 self._advance() 5580 result, trie = in_trie(trie, key) 5581 if result == TrieResult.FAILED: 5582 break 5583 5584 if result == TrieResult.EXISTS: 5585 subparser = parsers[" ".join(this)] 5586 return subparser 5587 5588 self._retreat(index) 5589 return None 5590 5591 def _match(self, token_type, advance=True, expression=None): 5592 if not self._curr: 5593 return None 5594 5595 if self._curr.token_type == token_type: 5596 if advance: 5597 self._advance() 5598 self._add_comments(expression) 5599 return True 5600 5601 return None 5602 5603 def _match_set(self, types, advance=True): 5604 if not self._curr: 5605 return None 5606 5607 if self._curr.token_type in types: 5608 if advance: 5609 self._advance() 5610 return True 5611 5612 return None 5613 5614 def _match_pair(self, token_type_a, token_type_b, advance=True): 5615 if not self._curr or not self._next: 5616 return None 5617 5618 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5619 if advance: 5620 self._advance(2) 5621 return True 5622 5623 return None 5624 5625 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5626 if not self._match(TokenType.L_PAREN, expression=expression): 5627 self.raise_error("Expecting (") 5628 5629 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5630 if not self._match(TokenType.R_PAREN, expression=expression): 5631 self.raise_error("Expecting )") 5632 5633 def _match_texts(self, texts, advance=True): 5634 if self._curr and self._curr.text.upper() in texts: 5635 if advance: 5636 self._advance() 5637 return True 5638 return False 5639 5640 def _match_text_seq(self, *texts, advance=True): 5641 index = self._index 5642 for text in texts: 5643 if self._curr and self._curr.text.upper() == text: 5644 self._advance() 5645 else: 5646 self._retreat(index) 5647 return False 5648 5649 if not advance: 5650 self._retreat(index) 5651 5652 return True 5653 5654 @t.overload 5655 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5656 ... 5657 5658 @t.overload 5659 def _replace_columns_with_dots( 5660 self, this: t.Optional[exp.Expression] 5661 ) -> t.Optional[exp.Expression]: 5662 ... 5663 5664 def _replace_columns_with_dots(self, this): 5665 if isinstance(this, exp.Dot): 5666 exp.replace_children(this, self._replace_columns_with_dots) 5667 elif isinstance(this, exp.Column): 5668 exp.replace_children(this, self._replace_columns_with_dots) 5669 table = this.args.get("table") 5670 this = ( 5671 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5672 ) 5673 5674 return this 5675 5676 def _replace_lambda( 5677 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5678 ) -> t.Optional[exp.Expression]: 5679 if not node: 5680 return node 5681 5682 for column in node.find_all(exp.Column): 5683 if column.parts[0].name in lambda_variables: 5684 dot_or_id = column.to_dot() if column.table else column.this 5685 parent = column.parent 5686 5687 while isinstance(parent, exp.Dot): 5688 if not isinstance(parent.parent, exp.Dot): 5689 parent.replace(dot_or_id) 5690 break 5691 parent = parent.parent 5692 else: 5693 if column is node: 5694 node = dot_or_id 5695 else: 5696 column.replace(dot_or_id) 5697 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
996 def __init__( 997 self, 998 error_level: t.Optional[ErrorLevel] = None, 999 error_message_context: int = 100, 1000 max_errors: int = 3, 1001 dialect: DialectType = None, 1002 ): 1003 from sqlglot.dialects import Dialect 1004 1005 self.error_level = error_level or ErrorLevel.IMMEDIATE 1006 self.error_message_context = error_message_context 1007 self.max_errors = max_errors 1008 self.dialect = Dialect.get_or_raise(dialect) 1009 self.reset()
1021 def parse( 1022 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1023 ) -> t.List[t.Optional[exp.Expression]]: 1024 """ 1025 Parses a list of tokens and returns a list of syntax trees, one tree 1026 per parsed SQL statement. 1027 1028 Args: 1029 raw_tokens: The list of tokens. 1030 sql: The original SQL string, used to produce helpful debug messages. 1031 1032 Returns: 1033 The list of the produced syntax trees. 1034 """ 1035 return self._parse( 1036 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1037 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1039 def parse_into( 1040 self, 1041 expression_types: exp.IntoType, 1042 raw_tokens: t.List[Token], 1043 sql: t.Optional[str] = None, 1044 ) -> t.List[t.Optional[exp.Expression]]: 1045 """ 1046 Parses a list of tokens into a given Expression type. If a collection of Expression 1047 types is given instead, this method will try to parse the token list into each one 1048 of them, stopping at the first for which the parsing succeeds. 1049 1050 Args: 1051 expression_types: The expression type(s) to try and parse the token list into. 1052 raw_tokens: The list of tokens. 1053 sql: The original SQL string, used to produce helpful debug messages. 1054 1055 Returns: 1056 The target Expression. 1057 """ 1058 errors = [] 1059 for expression_type in ensure_list(expression_types): 1060 parser = self.EXPRESSION_PARSERS.get(expression_type) 1061 if not parser: 1062 raise TypeError(f"No parser registered for {expression_type}") 1063 1064 try: 1065 return self._parse(parser, raw_tokens, sql) 1066 except ParseError as e: 1067 e.errors[0]["into_expression"] = expression_type 1068 errors.append(e) 1069 1070 raise ParseError( 1071 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1072 errors=merge_errors(errors), 1073 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1110 def check_errors(self) -> None: 1111 """Logs or raises any found errors, depending on the chosen error level setting.""" 1112 if self.error_level == ErrorLevel.WARN: 1113 for error in self.errors: 1114 logger.error(str(error)) 1115 elif self.error_level == ErrorLevel.RAISE and self.errors: 1116 raise ParseError( 1117 concat_messages(self.errors, self.max_errors), 1118 errors=merge_errors(self.errors), 1119 )
Logs or raises any found errors, depending on the chosen error level setting.
1121 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1122 """ 1123 Appends an error in the list of recorded errors or raises it, depending on the chosen 1124 error level setting. 1125 """ 1126 token = token or self._curr or self._prev or Token.string("") 1127 start = token.start 1128 end = token.end + 1 1129 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1130 highlight = self.sql[start:end] 1131 end_context = self.sql[end : end + self.error_message_context] 1132 1133 error = ParseError.new( 1134 f"{message}. Line {token.line}, Col: {token.col}.\n" 1135 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1136 description=message, 1137 line=token.line, 1138 col=token.col, 1139 start_context=start_context, 1140 highlight=highlight, 1141 end_context=end_context, 1142 ) 1143 1144 if self.error_level == ErrorLevel.IMMEDIATE: 1145 raise error 1146 1147 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1149 def expression( 1150 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1151 ) -> E: 1152 """ 1153 Creates a new, validated Expression. 1154 1155 Args: 1156 exp_class: The expression class to instantiate. 1157 comments: An optional list of comments to attach to the expression. 1158 kwargs: The arguments to set for the expression along with their respective values. 1159 1160 Returns: 1161 The target expression. 1162 """ 1163 instance = exp_class(**kwargs) 1164 instance.add_comments(comments) if comments else self._add_comments(instance) 1165 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1172 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1173 """ 1174 Validates an Expression, making sure that all its mandatory arguments are set. 1175 1176 Args: 1177 expression: The expression to validate. 1178 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1179 1180 Returns: 1181 The validated expression. 1182 """ 1183 if self.error_level != ErrorLevel.IGNORE: 1184 for error_message in expression.error_messages(args): 1185 self.raise_error(error_message) 1186 1187 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.