sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18logger = logging.getLogger("sqlglot") 19 20 21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 ) 35 36 37def parse_like(args: t.List) -> exp.Escape | exp.Like: 38 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 39 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 40 41 42def binary_range_parser( 43 expr_type: t.Type[exp.Expression], 44) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 45 return lambda self, this: self._parse_escape( 46 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 47 ) 48 49 50def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 51 # Default argument order is base, expression 52 this = seq_get(args, 0) 53 expression = seq_get(args, 1) 54 55 if expression: 56 if not dialect.LOG_BASE_FIRST: 57 this, expression = expression, this 58 return exp.Log(this=this, expression=expression) 59 60 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 61 62 63class _Parser(type): 64 def __new__(cls, clsname, bases, attrs): 65 klass = super().__new__(cls, clsname, bases, attrs) 66 67 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 68 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 69 70 return klass 71 72 73class Parser(metaclass=_Parser): 74 """ 75 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 76 77 Args: 78 error_level: The desired error level. 79 Default: ErrorLevel.IMMEDIATE 80 error_message_context: Determines the amount of context to capture from a 81 query string when displaying the error message (in number of characters). 82 Default: 100 83 max_errors: Maximum number of error messages to include in a raised ParseError. 84 This is only relevant if error_level is ErrorLevel.RAISE. 85 Default: 3 86 """ 87 88 FUNCTIONS: t.Dict[str, t.Callable] = { 89 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 90 "CONCAT": lambda args, dialect: exp.Concat( 91 expressions=args, 92 safe=not dialect.STRICT_STRING_CONCAT, 93 coalesce=dialect.CONCAT_COALESCE, 94 ), 95 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 96 expressions=args, 97 safe=not dialect.STRICT_STRING_CONCAT, 98 coalesce=dialect.CONCAT_COALESCE, 99 ), 100 "DATE_TO_DATE_STR": lambda args: exp.Cast( 101 this=seq_get(args, 0), 102 to=exp.DataType(this=exp.DataType.Type.TEXT), 103 ), 104 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 105 "LIKE": parse_like, 106 "LOG": parse_logarithm, 107 "TIME_TO_TIME_STR": lambda args: exp.Cast( 108 this=seq_get(args, 0), 109 to=exp.DataType(this=exp.DataType.Type.TEXT), 110 ), 111 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 112 this=exp.Cast( 113 this=seq_get(args, 0), 114 to=exp.DataType(this=exp.DataType.Type.TEXT), 115 ), 116 start=exp.Literal.number(1), 117 length=exp.Literal.number(10), 118 ), 119 "VAR_MAP": parse_var_map, 120 } 121 122 NO_PAREN_FUNCTIONS = { 123 TokenType.CURRENT_DATE: exp.CurrentDate, 124 TokenType.CURRENT_DATETIME: exp.CurrentDate, 125 TokenType.CURRENT_TIME: exp.CurrentTime, 126 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 127 TokenType.CURRENT_USER: exp.CurrentUser, 128 } 129 130 STRUCT_TYPE_TOKENS = { 131 TokenType.NESTED, 132 TokenType.STRUCT, 133 } 134 135 NESTED_TYPE_TOKENS = { 136 TokenType.ARRAY, 137 TokenType.LOWCARDINALITY, 138 TokenType.MAP, 139 TokenType.NULLABLE, 140 *STRUCT_TYPE_TOKENS, 141 } 142 143 ENUM_TYPE_TOKENS = { 144 TokenType.ENUM, 145 TokenType.ENUM8, 146 TokenType.ENUM16, 147 } 148 149 AGGREGATE_TYPE_TOKENS = { 150 TokenType.AGGREGATEFUNCTION, 151 TokenType.SIMPLEAGGREGATEFUNCTION, 152 } 153 154 TYPE_TOKENS = { 155 TokenType.BIT, 156 TokenType.BOOLEAN, 157 TokenType.TINYINT, 158 TokenType.UTINYINT, 159 TokenType.SMALLINT, 160 TokenType.USMALLINT, 161 TokenType.INT, 162 TokenType.UINT, 163 TokenType.BIGINT, 164 TokenType.UBIGINT, 165 TokenType.INT128, 166 TokenType.UINT128, 167 TokenType.INT256, 168 TokenType.UINT256, 169 TokenType.MEDIUMINT, 170 TokenType.UMEDIUMINT, 171 TokenType.FIXEDSTRING, 172 TokenType.FLOAT, 173 TokenType.DOUBLE, 174 TokenType.CHAR, 175 TokenType.NCHAR, 176 TokenType.VARCHAR, 177 TokenType.NVARCHAR, 178 TokenType.TEXT, 179 TokenType.MEDIUMTEXT, 180 TokenType.LONGTEXT, 181 TokenType.MEDIUMBLOB, 182 TokenType.LONGBLOB, 183 TokenType.BINARY, 184 TokenType.VARBINARY, 185 TokenType.JSON, 186 TokenType.JSONB, 187 TokenType.INTERVAL, 188 TokenType.TINYBLOB, 189 TokenType.TINYTEXT, 190 TokenType.TIME, 191 TokenType.TIMETZ, 192 TokenType.TIMESTAMP, 193 TokenType.TIMESTAMP_S, 194 TokenType.TIMESTAMP_MS, 195 TokenType.TIMESTAMP_NS, 196 TokenType.TIMESTAMPTZ, 197 TokenType.TIMESTAMPLTZ, 198 TokenType.DATETIME, 199 TokenType.DATETIME64, 200 TokenType.DATE, 201 TokenType.DATE32, 202 TokenType.INT4RANGE, 203 TokenType.INT4MULTIRANGE, 204 TokenType.INT8RANGE, 205 TokenType.INT8MULTIRANGE, 206 TokenType.NUMRANGE, 207 TokenType.NUMMULTIRANGE, 208 TokenType.TSRANGE, 209 TokenType.TSMULTIRANGE, 210 TokenType.TSTZRANGE, 211 TokenType.TSTZMULTIRANGE, 212 TokenType.DATERANGE, 213 TokenType.DATEMULTIRANGE, 214 TokenType.DECIMAL, 215 TokenType.UDECIMAL, 216 TokenType.BIGDECIMAL, 217 TokenType.UUID, 218 TokenType.GEOGRAPHY, 219 TokenType.GEOMETRY, 220 TokenType.HLLSKETCH, 221 TokenType.HSTORE, 222 TokenType.PSEUDO_TYPE, 223 TokenType.SUPER, 224 TokenType.SERIAL, 225 TokenType.SMALLSERIAL, 226 TokenType.BIGSERIAL, 227 TokenType.XML, 228 TokenType.YEAR, 229 TokenType.UNIQUEIDENTIFIER, 230 TokenType.USERDEFINED, 231 TokenType.MONEY, 232 TokenType.SMALLMONEY, 233 TokenType.ROWVERSION, 234 TokenType.IMAGE, 235 TokenType.VARIANT, 236 TokenType.OBJECT, 237 TokenType.OBJECT_IDENTIFIER, 238 TokenType.INET, 239 TokenType.IPADDRESS, 240 TokenType.IPPREFIX, 241 TokenType.IPV4, 242 TokenType.IPV6, 243 TokenType.UNKNOWN, 244 TokenType.NULL, 245 *ENUM_TYPE_TOKENS, 246 *NESTED_TYPE_TOKENS, 247 *AGGREGATE_TYPE_TOKENS, 248 } 249 250 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 251 TokenType.BIGINT: TokenType.UBIGINT, 252 TokenType.INT: TokenType.UINT, 253 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 254 TokenType.SMALLINT: TokenType.USMALLINT, 255 TokenType.TINYINT: TokenType.UTINYINT, 256 TokenType.DECIMAL: TokenType.UDECIMAL, 257 } 258 259 SUBQUERY_PREDICATES = { 260 TokenType.ANY: exp.Any, 261 TokenType.ALL: exp.All, 262 TokenType.EXISTS: exp.Exists, 263 TokenType.SOME: exp.Any, 264 } 265 266 RESERVED_TOKENS = { 267 *Tokenizer.SINGLE_TOKENS.values(), 268 TokenType.SELECT, 269 } 270 271 DB_CREATABLES = { 272 TokenType.DATABASE, 273 TokenType.SCHEMA, 274 TokenType.TABLE, 275 TokenType.VIEW, 276 TokenType.MODEL, 277 TokenType.DICTIONARY, 278 } 279 280 CREATABLES = { 281 TokenType.COLUMN, 282 TokenType.CONSTRAINT, 283 TokenType.FUNCTION, 284 TokenType.INDEX, 285 TokenType.PROCEDURE, 286 TokenType.FOREIGN_KEY, 287 *DB_CREATABLES, 288 } 289 290 # Tokens that can represent identifiers 291 ID_VAR_TOKENS = { 292 TokenType.VAR, 293 TokenType.ANTI, 294 TokenType.APPLY, 295 TokenType.ASC, 296 TokenType.AUTO_INCREMENT, 297 TokenType.BEGIN, 298 TokenType.CACHE, 299 TokenType.CASE, 300 TokenType.COLLATE, 301 TokenType.COMMAND, 302 TokenType.COMMENT, 303 TokenType.COMMIT, 304 TokenType.CONSTRAINT, 305 TokenType.DEFAULT, 306 TokenType.DELETE, 307 TokenType.DESC, 308 TokenType.DESCRIBE, 309 TokenType.DICTIONARY, 310 TokenType.DIV, 311 TokenType.END, 312 TokenType.EXECUTE, 313 TokenType.ESCAPE, 314 TokenType.FALSE, 315 TokenType.FIRST, 316 TokenType.FILTER, 317 TokenType.FINAL, 318 TokenType.FORMAT, 319 TokenType.FULL, 320 TokenType.IS, 321 TokenType.ISNULL, 322 TokenType.INTERVAL, 323 TokenType.KEEP, 324 TokenType.KILL, 325 TokenType.LEFT, 326 TokenType.LOAD, 327 TokenType.MERGE, 328 TokenType.NATURAL, 329 TokenType.NEXT, 330 TokenType.OFFSET, 331 TokenType.OPERATOR, 332 TokenType.ORDINALITY, 333 TokenType.OVERLAPS, 334 TokenType.OVERWRITE, 335 TokenType.PARTITION, 336 TokenType.PERCENT, 337 TokenType.PIVOT, 338 TokenType.PRAGMA, 339 TokenType.RANGE, 340 TokenType.RECURSIVE, 341 TokenType.REFERENCES, 342 TokenType.REFRESH, 343 TokenType.REPLACE, 344 TokenType.RIGHT, 345 TokenType.ROW, 346 TokenType.ROWS, 347 TokenType.SEMI, 348 TokenType.SET, 349 TokenType.SETTINGS, 350 TokenType.SHOW, 351 TokenType.TEMPORARY, 352 TokenType.TOP, 353 TokenType.TRUE, 354 TokenType.UNIQUE, 355 TokenType.UNPIVOT, 356 TokenType.UPDATE, 357 TokenType.USE, 358 TokenType.VOLATILE, 359 TokenType.WINDOW, 360 *CREATABLES, 361 *SUBQUERY_PREDICATES, 362 *TYPE_TOKENS, 363 *NO_PAREN_FUNCTIONS, 364 } 365 366 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 367 368 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 369 TokenType.ANTI, 370 TokenType.APPLY, 371 TokenType.ASOF, 372 TokenType.FULL, 373 TokenType.LEFT, 374 TokenType.LOCK, 375 TokenType.NATURAL, 376 TokenType.OFFSET, 377 TokenType.RIGHT, 378 TokenType.SEMI, 379 TokenType.WINDOW, 380 } 381 382 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 383 384 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 385 386 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 387 388 FUNC_TOKENS = { 389 TokenType.COLLATE, 390 TokenType.COMMAND, 391 TokenType.CURRENT_DATE, 392 TokenType.CURRENT_DATETIME, 393 TokenType.CURRENT_TIMESTAMP, 394 TokenType.CURRENT_TIME, 395 TokenType.CURRENT_USER, 396 TokenType.FILTER, 397 TokenType.FIRST, 398 TokenType.FORMAT, 399 TokenType.GLOB, 400 TokenType.IDENTIFIER, 401 TokenType.INDEX, 402 TokenType.ISNULL, 403 TokenType.ILIKE, 404 TokenType.INSERT, 405 TokenType.LIKE, 406 TokenType.MERGE, 407 TokenType.OFFSET, 408 TokenType.PRIMARY_KEY, 409 TokenType.RANGE, 410 TokenType.REPLACE, 411 TokenType.RLIKE, 412 TokenType.ROW, 413 TokenType.UNNEST, 414 TokenType.VAR, 415 TokenType.LEFT, 416 TokenType.RIGHT, 417 TokenType.DATE, 418 TokenType.DATETIME, 419 TokenType.TABLE, 420 TokenType.TIMESTAMP, 421 TokenType.TIMESTAMPTZ, 422 TokenType.WINDOW, 423 TokenType.XOR, 424 *TYPE_TOKENS, 425 *SUBQUERY_PREDICATES, 426 } 427 428 CONJUNCTION = { 429 TokenType.AND: exp.And, 430 TokenType.OR: exp.Or, 431 } 432 433 EQUALITY = { 434 TokenType.COLON_EQ: exp.PropertyEQ, 435 TokenType.EQ: exp.EQ, 436 TokenType.NEQ: exp.NEQ, 437 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 438 } 439 440 COMPARISON = { 441 TokenType.GT: exp.GT, 442 TokenType.GTE: exp.GTE, 443 TokenType.LT: exp.LT, 444 TokenType.LTE: exp.LTE, 445 } 446 447 BITWISE = { 448 TokenType.AMP: exp.BitwiseAnd, 449 TokenType.CARET: exp.BitwiseXor, 450 TokenType.PIPE: exp.BitwiseOr, 451 } 452 453 TERM = { 454 TokenType.DASH: exp.Sub, 455 TokenType.PLUS: exp.Add, 456 TokenType.MOD: exp.Mod, 457 TokenType.COLLATE: exp.Collate, 458 } 459 460 FACTOR = { 461 TokenType.DIV: exp.IntDiv, 462 TokenType.LR_ARROW: exp.Distance, 463 TokenType.SLASH: exp.Div, 464 TokenType.STAR: exp.Mul, 465 } 466 467 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 468 469 TIMES = { 470 TokenType.TIME, 471 TokenType.TIMETZ, 472 } 473 474 TIMESTAMPS = { 475 TokenType.TIMESTAMP, 476 TokenType.TIMESTAMPTZ, 477 TokenType.TIMESTAMPLTZ, 478 *TIMES, 479 } 480 481 SET_OPERATIONS = { 482 TokenType.UNION, 483 TokenType.INTERSECT, 484 TokenType.EXCEPT, 485 } 486 487 JOIN_METHODS = { 488 TokenType.NATURAL, 489 TokenType.ASOF, 490 } 491 492 JOIN_SIDES = { 493 TokenType.LEFT, 494 TokenType.RIGHT, 495 TokenType.FULL, 496 } 497 498 JOIN_KINDS = { 499 TokenType.INNER, 500 TokenType.OUTER, 501 TokenType.CROSS, 502 TokenType.SEMI, 503 TokenType.ANTI, 504 } 505 506 JOIN_HINTS: t.Set[str] = set() 507 508 LAMBDAS = { 509 TokenType.ARROW: lambda self, expressions: self.expression( 510 exp.Lambda, 511 this=self._replace_lambda( 512 self._parse_conjunction(), 513 {node.name for node in expressions}, 514 ), 515 expressions=expressions, 516 ), 517 TokenType.FARROW: lambda self, expressions: self.expression( 518 exp.Kwarg, 519 this=exp.var(expressions[0].name), 520 expression=self._parse_conjunction(), 521 ), 522 } 523 524 COLUMN_OPERATORS = { 525 TokenType.DOT: None, 526 TokenType.DCOLON: lambda self, this, to: self.expression( 527 exp.Cast if self.STRICT_CAST else exp.TryCast, 528 this=this, 529 to=to, 530 ), 531 TokenType.ARROW: lambda self, this, path: self.expression( 532 exp.JSONExtract, 533 this=this, 534 expression=path, 535 ), 536 TokenType.DARROW: lambda self, this, path: self.expression( 537 exp.JSONExtractScalar, 538 this=this, 539 expression=path, 540 ), 541 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 542 exp.JSONBExtract, 543 this=this, 544 expression=path, 545 ), 546 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 547 exp.JSONBExtractScalar, 548 this=this, 549 expression=path, 550 ), 551 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 552 exp.JSONBContains, 553 this=this, 554 expression=key, 555 ), 556 } 557 558 EXPRESSION_PARSERS = { 559 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 560 exp.Column: lambda self: self._parse_column(), 561 exp.Condition: lambda self: self._parse_conjunction(), 562 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 563 exp.Expression: lambda self: self._parse_statement(), 564 exp.From: lambda self: self._parse_from(), 565 exp.Group: lambda self: self._parse_group(), 566 exp.Having: lambda self: self._parse_having(), 567 exp.Identifier: lambda self: self._parse_id_var(), 568 exp.Join: lambda self: self._parse_join(), 569 exp.Lambda: lambda self: self._parse_lambda(), 570 exp.Lateral: lambda self: self._parse_lateral(), 571 exp.Limit: lambda self: self._parse_limit(), 572 exp.Offset: lambda self: self._parse_offset(), 573 exp.Order: lambda self: self._parse_order(), 574 exp.Ordered: lambda self: self._parse_ordered(), 575 exp.Properties: lambda self: self._parse_properties(), 576 exp.Qualify: lambda self: self._parse_qualify(), 577 exp.Returning: lambda self: self._parse_returning(), 578 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 579 exp.Table: lambda self: self._parse_table_parts(), 580 exp.TableAlias: lambda self: self._parse_table_alias(), 581 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 582 exp.Where: lambda self: self._parse_where(), 583 exp.Window: lambda self: self._parse_named_window(), 584 exp.With: lambda self: self._parse_with(), 585 "JOIN_TYPE": lambda self: self._parse_join_parts(), 586 } 587 588 STATEMENT_PARSERS = { 589 TokenType.ALTER: lambda self: self._parse_alter(), 590 TokenType.BEGIN: lambda self: self._parse_transaction(), 591 TokenType.CACHE: lambda self: self._parse_cache(), 592 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 593 TokenType.COMMENT: lambda self: self._parse_comment(), 594 TokenType.CREATE: lambda self: self._parse_create(), 595 TokenType.DELETE: lambda self: self._parse_delete(), 596 TokenType.DESC: lambda self: self._parse_describe(), 597 TokenType.DESCRIBE: lambda self: self._parse_describe(), 598 TokenType.DROP: lambda self: self._parse_drop(), 599 TokenType.INSERT: lambda self: self._parse_insert(), 600 TokenType.KILL: lambda self: self._parse_kill(), 601 TokenType.LOAD: lambda self: self._parse_load(), 602 TokenType.MERGE: lambda self: self._parse_merge(), 603 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 604 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 605 TokenType.REFRESH: lambda self: self._parse_refresh(), 606 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 607 TokenType.SET: lambda self: self._parse_set(), 608 TokenType.UNCACHE: lambda self: self._parse_uncache(), 609 TokenType.UPDATE: lambda self: self._parse_update(), 610 TokenType.USE: lambda self: self.expression( 611 exp.Use, 612 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 613 and exp.var(self._prev.text), 614 this=self._parse_table(schema=False), 615 ), 616 } 617 618 UNARY_PARSERS = { 619 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 620 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 621 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 622 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 623 } 624 625 PRIMARY_PARSERS = { 626 TokenType.STRING: lambda self, token: self.expression( 627 exp.Literal, this=token.text, is_string=True 628 ), 629 TokenType.NUMBER: lambda self, token: self.expression( 630 exp.Literal, this=token.text, is_string=False 631 ), 632 TokenType.STAR: lambda self, _: self.expression( 633 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 634 ), 635 TokenType.NULL: lambda self, _: self.expression(exp.Null), 636 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 637 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 638 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 639 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 640 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 641 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 642 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 643 exp.National, this=token.text 644 ), 645 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 646 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 647 exp.RawString, this=token.text 648 ), 649 TokenType.UNICODE_STRING: lambda self, token: self.expression( 650 exp.UnicodeString, 651 this=token.text, 652 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 653 ), 654 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 655 } 656 657 PLACEHOLDER_PARSERS = { 658 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 659 TokenType.PARAMETER: lambda self: self._parse_parameter(), 660 TokenType.COLON: lambda self: ( 661 self.expression(exp.Placeholder, this=self._prev.text) 662 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 663 else None 664 ), 665 } 666 667 RANGE_PARSERS = { 668 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 669 TokenType.GLOB: binary_range_parser(exp.Glob), 670 TokenType.ILIKE: binary_range_parser(exp.ILike), 671 TokenType.IN: lambda self, this: self._parse_in(this), 672 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 673 TokenType.IS: lambda self, this: self._parse_is(this), 674 TokenType.LIKE: binary_range_parser(exp.Like), 675 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 676 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 677 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 678 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 679 } 680 681 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 682 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 683 "AUTO": lambda self: self._parse_auto_property(), 684 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 685 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 686 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 687 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 688 "CHECKSUM": lambda self: self._parse_checksum(), 689 "CLUSTER BY": lambda self: self._parse_cluster(), 690 "CLUSTERED": lambda self: self._parse_clustered_by(), 691 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 692 exp.CollateProperty, **kwargs 693 ), 694 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 695 "CONTAINS": lambda self: self._parse_contains_property(), 696 "COPY": lambda self: self._parse_copy_property(), 697 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 698 "DEFINER": lambda self: self._parse_definer(), 699 "DETERMINISTIC": lambda self: self.expression( 700 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 701 ), 702 "DISTKEY": lambda self: self._parse_distkey(), 703 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 704 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 705 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 706 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 707 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 708 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 709 "FREESPACE": lambda self: self._parse_freespace(), 710 "HEAP": lambda self: self.expression(exp.HeapProperty), 711 "IMMUTABLE": lambda self: self.expression( 712 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 713 ), 714 "INHERITS": lambda self: self.expression( 715 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 716 ), 717 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 718 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 719 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 720 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 721 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 722 "LIKE": lambda self: self._parse_create_like(), 723 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 724 "LOCK": lambda self: self._parse_locking(), 725 "LOCKING": lambda self: self._parse_locking(), 726 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 727 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 728 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 729 "MODIFIES": lambda self: self._parse_modifies_property(), 730 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 731 "NO": lambda self: self._parse_no_property(), 732 "ON": lambda self: self._parse_on_property(), 733 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 734 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 735 "PARTITION": lambda self: self._parse_partitioned_of(), 736 "PARTITION BY": lambda self: self._parse_partitioned_by(), 737 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 738 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 739 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 740 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 741 "READS": lambda self: self._parse_reads_property(), 742 "REMOTE": lambda self: self._parse_remote_with_connection(), 743 "RETURNS": lambda self: self._parse_returns(), 744 "ROW": lambda self: self._parse_row(), 745 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 746 "SAMPLE": lambda self: self.expression( 747 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 748 ), 749 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 750 "SETTINGS": lambda self: self.expression( 751 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 752 ), 753 "SORTKEY": lambda self: self._parse_sortkey(), 754 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 755 "STABLE": lambda self: self.expression( 756 exp.StabilityProperty, this=exp.Literal.string("STABLE") 757 ), 758 "STORED": lambda self: self._parse_stored(), 759 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 760 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 761 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 762 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 763 "TO": lambda self: self._parse_to_table(), 764 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 765 "TRANSFORM": lambda self: self.expression( 766 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 767 ), 768 "TTL": lambda self: self._parse_ttl(), 769 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 770 "VOLATILE": lambda self: self._parse_volatile_property(), 771 "WITH": lambda self: self._parse_with_property(), 772 } 773 774 CONSTRAINT_PARSERS = { 775 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 776 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 777 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 778 "CHARACTER SET": lambda self: self.expression( 779 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 780 ), 781 "CHECK": lambda self: self.expression( 782 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 783 ), 784 "COLLATE": lambda self: self.expression( 785 exp.CollateColumnConstraint, this=self._parse_var() 786 ), 787 "COMMENT": lambda self: self.expression( 788 exp.CommentColumnConstraint, this=self._parse_string() 789 ), 790 "COMPRESS": lambda self: self._parse_compress(), 791 "CLUSTERED": lambda self: self.expression( 792 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 793 ), 794 "NONCLUSTERED": lambda self: self.expression( 795 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 796 ), 797 "DEFAULT": lambda self: self.expression( 798 exp.DefaultColumnConstraint, this=self._parse_bitwise() 799 ), 800 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 801 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 802 "FORMAT": lambda self: self.expression( 803 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 804 ), 805 "GENERATED": lambda self: self._parse_generated_as_identity(), 806 "IDENTITY": lambda self: self._parse_auto_increment(), 807 "INLINE": lambda self: self._parse_inline(), 808 "LIKE": lambda self: self._parse_create_like(), 809 "NOT": lambda self: self._parse_not_constraint(), 810 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 811 "ON": lambda self: ( 812 self._match(TokenType.UPDATE) 813 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 814 ) 815 or self.expression(exp.OnProperty, this=self._parse_id_var()), 816 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 817 "PERIOD": lambda self: self._parse_period_for_system_time(), 818 "PRIMARY KEY": lambda self: self._parse_primary_key(), 819 "REFERENCES": lambda self: self._parse_references(match=False), 820 "TITLE": lambda self: self.expression( 821 exp.TitleColumnConstraint, this=self._parse_var_or_string() 822 ), 823 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 824 "UNIQUE": lambda self: self._parse_unique(), 825 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 826 "WITH": lambda self: self.expression( 827 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 828 ), 829 } 830 831 ALTER_PARSERS = { 832 "ADD": lambda self: self._parse_alter_table_add(), 833 "ALTER": lambda self: self._parse_alter_table_alter(), 834 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 835 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 836 "DROP": lambda self: self._parse_alter_table_drop(), 837 "RENAME": lambda self: self._parse_alter_table_rename(), 838 } 839 840 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 841 842 NO_PAREN_FUNCTION_PARSERS = { 843 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 844 "CASE": lambda self: self._parse_case(), 845 "IF": lambda self: self._parse_if(), 846 "NEXT": lambda self: self._parse_next_value_for(), 847 } 848 849 INVALID_FUNC_NAME_TOKENS = { 850 TokenType.IDENTIFIER, 851 TokenType.STRING, 852 } 853 854 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 855 856 FUNCTION_PARSERS = { 857 "ANY_VALUE": lambda self: self._parse_any_value(), 858 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 859 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 860 "DECODE": lambda self: self._parse_decode(), 861 "EXTRACT": lambda self: self._parse_extract(), 862 "JSON_OBJECT": lambda self: self._parse_json_object(), 863 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 864 "JSON_TABLE": lambda self: self._parse_json_table(), 865 "MATCH": lambda self: self._parse_match_against(), 866 "OPENJSON": lambda self: self._parse_open_json(), 867 "POSITION": lambda self: self._parse_position(), 868 "PREDICT": lambda self: self._parse_predict(), 869 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 870 "STRING_AGG": lambda self: self._parse_string_agg(), 871 "SUBSTRING": lambda self: self._parse_substring(), 872 "TRIM": lambda self: self._parse_trim(), 873 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 874 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 875 } 876 877 QUERY_MODIFIER_PARSERS = { 878 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 879 TokenType.WHERE: lambda self: ("where", self._parse_where()), 880 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 881 TokenType.HAVING: lambda self: ("having", self._parse_having()), 882 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 883 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 884 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 885 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 886 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 887 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 888 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 889 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 890 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 891 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 892 TokenType.CLUSTER_BY: lambda self: ( 893 "cluster", 894 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 895 ), 896 TokenType.DISTRIBUTE_BY: lambda self: ( 897 "distribute", 898 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 899 ), 900 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 901 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 902 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 903 } 904 905 SET_PARSERS = { 906 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 907 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 908 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 909 "TRANSACTION": lambda self: self._parse_set_transaction(), 910 } 911 912 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 913 914 TYPE_LITERAL_PARSERS = { 915 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 916 } 917 918 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 919 920 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 921 922 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 923 924 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 925 TRANSACTION_CHARACTERISTICS = { 926 "ISOLATION LEVEL REPEATABLE READ", 927 "ISOLATION LEVEL READ COMMITTED", 928 "ISOLATION LEVEL READ UNCOMMITTED", 929 "ISOLATION LEVEL SERIALIZABLE", 930 "READ WRITE", 931 "READ ONLY", 932 } 933 934 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 935 936 CLONE_KEYWORDS = {"CLONE", "COPY"} 937 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 938 939 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 940 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 941 942 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 943 944 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 945 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 946 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 947 948 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 949 950 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 951 952 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 953 954 DISTINCT_TOKENS = {TokenType.DISTINCT} 955 956 NULL_TOKENS = {TokenType.NULL} 957 958 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 959 960 STRICT_CAST = True 961 962 PREFIXED_PIVOT_COLUMNS = False 963 IDENTIFY_PIVOT_STRINGS = False 964 965 LOG_DEFAULTS_TO_LN = False 966 967 # Whether or not ADD is present for each column added by ALTER TABLE 968 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 969 970 # Whether or not the table sample clause expects CSV syntax 971 TABLESAMPLE_CSV = False 972 973 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 974 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 975 976 # Whether the TRIM function expects the characters to trim as its first argument 977 TRIM_PATTERN_FIRST = False 978 979 # Whether or not string aliases are supported `SELECT COUNT(*) 'count'` 980 STRING_ALIASES = False 981 982 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 983 MODIFIERS_ATTACHED_TO_UNION = True 984 UNION_MODIFIERS = {"order", "limit", "offset"} 985 986 # parses no parenthesis if statements as commands 987 NO_PAREN_IF_COMMANDS = True 988 989 __slots__ = ( 990 "error_level", 991 "error_message_context", 992 "max_errors", 993 "dialect", 994 "sql", 995 "errors", 996 "_tokens", 997 "_index", 998 "_curr", 999 "_next", 1000 "_prev", 1001 "_prev_comments", 1002 ) 1003 1004 # Autofilled 1005 SHOW_TRIE: t.Dict = {} 1006 SET_TRIE: t.Dict = {} 1007 1008 def __init__( 1009 self, 1010 error_level: t.Optional[ErrorLevel] = None, 1011 error_message_context: int = 100, 1012 max_errors: int = 3, 1013 dialect: DialectType = None, 1014 ): 1015 from sqlglot.dialects import Dialect 1016 1017 self.error_level = error_level or ErrorLevel.IMMEDIATE 1018 self.error_message_context = error_message_context 1019 self.max_errors = max_errors 1020 self.dialect = Dialect.get_or_raise(dialect) 1021 self.reset() 1022 1023 def reset(self): 1024 self.sql = "" 1025 self.errors = [] 1026 self._tokens = [] 1027 self._index = 0 1028 self._curr = None 1029 self._next = None 1030 self._prev = None 1031 self._prev_comments = None 1032 1033 def parse( 1034 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1035 ) -> t.List[t.Optional[exp.Expression]]: 1036 """ 1037 Parses a list of tokens and returns a list of syntax trees, one tree 1038 per parsed SQL statement. 1039 1040 Args: 1041 raw_tokens: The list of tokens. 1042 sql: The original SQL string, used to produce helpful debug messages. 1043 1044 Returns: 1045 The list of the produced syntax trees. 1046 """ 1047 return self._parse( 1048 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1049 ) 1050 1051 def parse_into( 1052 self, 1053 expression_types: exp.IntoType, 1054 raw_tokens: t.List[Token], 1055 sql: t.Optional[str] = None, 1056 ) -> t.List[t.Optional[exp.Expression]]: 1057 """ 1058 Parses a list of tokens into a given Expression type. If a collection of Expression 1059 types is given instead, this method will try to parse the token list into each one 1060 of them, stopping at the first for which the parsing succeeds. 1061 1062 Args: 1063 expression_types: The expression type(s) to try and parse the token list into. 1064 raw_tokens: The list of tokens. 1065 sql: The original SQL string, used to produce helpful debug messages. 1066 1067 Returns: 1068 The target Expression. 1069 """ 1070 errors = [] 1071 for expression_type in ensure_list(expression_types): 1072 parser = self.EXPRESSION_PARSERS.get(expression_type) 1073 if not parser: 1074 raise TypeError(f"No parser registered for {expression_type}") 1075 1076 try: 1077 return self._parse(parser, raw_tokens, sql) 1078 except ParseError as e: 1079 e.errors[0]["into_expression"] = expression_type 1080 errors.append(e) 1081 1082 raise ParseError( 1083 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1084 errors=merge_errors(errors), 1085 ) from errors[-1] 1086 1087 def _parse( 1088 self, 1089 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1090 raw_tokens: t.List[Token], 1091 sql: t.Optional[str] = None, 1092 ) -> t.List[t.Optional[exp.Expression]]: 1093 self.reset() 1094 self.sql = sql or "" 1095 1096 total = len(raw_tokens) 1097 chunks: t.List[t.List[Token]] = [[]] 1098 1099 for i, token in enumerate(raw_tokens): 1100 if token.token_type == TokenType.SEMICOLON: 1101 if i < total - 1: 1102 chunks.append([]) 1103 else: 1104 chunks[-1].append(token) 1105 1106 expressions = [] 1107 1108 for tokens in chunks: 1109 self._index = -1 1110 self._tokens = tokens 1111 self._advance() 1112 1113 expressions.append(parse_method(self)) 1114 1115 if self._index < len(self._tokens): 1116 self.raise_error("Invalid expression / Unexpected token") 1117 1118 self.check_errors() 1119 1120 return expressions 1121 1122 def check_errors(self) -> None: 1123 """Logs or raises any found errors, depending on the chosen error level setting.""" 1124 if self.error_level == ErrorLevel.WARN: 1125 for error in self.errors: 1126 logger.error(str(error)) 1127 elif self.error_level == ErrorLevel.RAISE and self.errors: 1128 raise ParseError( 1129 concat_messages(self.errors, self.max_errors), 1130 errors=merge_errors(self.errors), 1131 ) 1132 1133 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1134 """ 1135 Appends an error in the list of recorded errors or raises it, depending on the chosen 1136 error level setting. 1137 """ 1138 token = token or self._curr or self._prev or Token.string("") 1139 start = token.start 1140 end = token.end + 1 1141 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1142 highlight = self.sql[start:end] 1143 end_context = self.sql[end : end + self.error_message_context] 1144 1145 error = ParseError.new( 1146 f"{message}. Line {token.line}, Col: {token.col}.\n" 1147 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1148 description=message, 1149 line=token.line, 1150 col=token.col, 1151 start_context=start_context, 1152 highlight=highlight, 1153 end_context=end_context, 1154 ) 1155 1156 if self.error_level == ErrorLevel.IMMEDIATE: 1157 raise error 1158 1159 self.errors.append(error) 1160 1161 def expression( 1162 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1163 ) -> E: 1164 """ 1165 Creates a new, validated Expression. 1166 1167 Args: 1168 exp_class: The expression class to instantiate. 1169 comments: An optional list of comments to attach to the expression. 1170 kwargs: The arguments to set for the expression along with their respective values. 1171 1172 Returns: 1173 The target expression. 1174 """ 1175 instance = exp_class(**kwargs) 1176 instance.add_comments(comments) if comments else self._add_comments(instance) 1177 return self.validate_expression(instance) 1178 1179 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1180 if expression and self._prev_comments: 1181 expression.add_comments(self._prev_comments) 1182 self._prev_comments = None 1183 1184 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1185 """ 1186 Validates an Expression, making sure that all its mandatory arguments are set. 1187 1188 Args: 1189 expression: The expression to validate. 1190 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1191 1192 Returns: 1193 The validated expression. 1194 """ 1195 if self.error_level != ErrorLevel.IGNORE: 1196 for error_message in expression.error_messages(args): 1197 self.raise_error(error_message) 1198 1199 return expression 1200 1201 def _find_sql(self, start: Token, end: Token) -> str: 1202 return self.sql[start.start : end.end + 1] 1203 1204 def _is_connected(self) -> bool: 1205 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1206 1207 def _advance(self, times: int = 1) -> None: 1208 self._index += times 1209 self._curr = seq_get(self._tokens, self._index) 1210 self._next = seq_get(self._tokens, self._index + 1) 1211 1212 if self._index > 0: 1213 self._prev = self._tokens[self._index - 1] 1214 self._prev_comments = self._prev.comments 1215 else: 1216 self._prev = None 1217 self._prev_comments = None 1218 1219 def _retreat(self, index: int) -> None: 1220 if index != self._index: 1221 self._advance(index - self._index) 1222 1223 def _warn_unsupported(self) -> None: 1224 if len(self._tokens) <= 1: 1225 return 1226 1227 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1228 # interested in emitting a warning for the one being currently processed. 1229 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1230 1231 logger.warning( 1232 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1233 ) 1234 1235 def _parse_command(self) -> exp.Command: 1236 self._warn_unsupported() 1237 return self.expression( 1238 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1239 ) 1240 1241 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1242 start = self._prev 1243 exists = self._parse_exists() if allow_exists else None 1244 1245 self._match(TokenType.ON) 1246 1247 kind = self._match_set(self.CREATABLES) and self._prev 1248 if not kind: 1249 return self._parse_as_command(start) 1250 1251 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1252 this = self._parse_user_defined_function(kind=kind.token_type) 1253 elif kind.token_type == TokenType.TABLE: 1254 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1255 elif kind.token_type == TokenType.COLUMN: 1256 this = self._parse_column() 1257 else: 1258 this = self._parse_id_var() 1259 1260 self._match(TokenType.IS) 1261 1262 return self.expression( 1263 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1264 ) 1265 1266 def _parse_to_table( 1267 self, 1268 ) -> exp.ToTableProperty: 1269 table = self._parse_table_parts(schema=True) 1270 return self.expression(exp.ToTableProperty, this=table) 1271 1272 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1273 def _parse_ttl(self) -> exp.Expression: 1274 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1275 this = self._parse_bitwise() 1276 1277 if self._match_text_seq("DELETE"): 1278 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1279 if self._match_text_seq("RECOMPRESS"): 1280 return self.expression( 1281 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1282 ) 1283 if self._match_text_seq("TO", "DISK"): 1284 return self.expression( 1285 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1286 ) 1287 if self._match_text_seq("TO", "VOLUME"): 1288 return self.expression( 1289 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1290 ) 1291 1292 return this 1293 1294 expressions = self._parse_csv(_parse_ttl_action) 1295 where = self._parse_where() 1296 group = self._parse_group() 1297 1298 aggregates = None 1299 if group and self._match(TokenType.SET): 1300 aggregates = self._parse_csv(self._parse_set_item) 1301 1302 return self.expression( 1303 exp.MergeTreeTTL, 1304 expressions=expressions, 1305 where=where, 1306 group=group, 1307 aggregates=aggregates, 1308 ) 1309 1310 def _parse_statement(self) -> t.Optional[exp.Expression]: 1311 if self._curr is None: 1312 return None 1313 1314 if self._match_set(self.STATEMENT_PARSERS): 1315 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1316 1317 if self._match_set(Tokenizer.COMMANDS): 1318 return self._parse_command() 1319 1320 expression = self._parse_expression() 1321 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1322 return self._parse_query_modifiers(expression) 1323 1324 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1325 start = self._prev 1326 temporary = self._match(TokenType.TEMPORARY) 1327 materialized = self._match_text_seq("MATERIALIZED") 1328 1329 kind = self._match_set(self.CREATABLES) and self._prev.text 1330 if not kind: 1331 return self._parse_as_command(start) 1332 1333 return self.expression( 1334 exp.Drop, 1335 comments=start.comments, 1336 exists=exists or self._parse_exists(), 1337 this=self._parse_table(schema=True), 1338 kind=kind, 1339 temporary=temporary, 1340 materialized=materialized, 1341 cascade=self._match_text_seq("CASCADE"), 1342 constraints=self._match_text_seq("CONSTRAINTS"), 1343 purge=self._match_text_seq("PURGE"), 1344 ) 1345 1346 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1347 return ( 1348 self._match_text_seq("IF") 1349 and (not not_ or self._match(TokenType.NOT)) 1350 and self._match(TokenType.EXISTS) 1351 ) 1352 1353 def _parse_create(self) -> exp.Create | exp.Command: 1354 # Note: this can't be None because we've matched a statement parser 1355 start = self._prev 1356 comments = self._prev_comments 1357 1358 replace = ( 1359 start.token_type == TokenType.REPLACE 1360 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1361 or self._match_pair(TokenType.OR, TokenType.ALTER) 1362 ) 1363 unique = self._match(TokenType.UNIQUE) 1364 1365 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1366 self._advance() 1367 1368 properties = None 1369 create_token = self._match_set(self.CREATABLES) and self._prev 1370 1371 if not create_token: 1372 # exp.Properties.Location.POST_CREATE 1373 properties = self._parse_properties() 1374 create_token = self._match_set(self.CREATABLES) and self._prev 1375 1376 if not properties or not create_token: 1377 return self._parse_as_command(start) 1378 1379 exists = self._parse_exists(not_=True) 1380 this = None 1381 expression: t.Optional[exp.Expression] = None 1382 indexes = None 1383 no_schema_binding = None 1384 begin = None 1385 end = None 1386 clone = None 1387 1388 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1389 nonlocal properties 1390 if properties and temp_props: 1391 properties.expressions.extend(temp_props.expressions) 1392 elif temp_props: 1393 properties = temp_props 1394 1395 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1396 this = self._parse_user_defined_function(kind=create_token.token_type) 1397 1398 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1399 extend_props(self._parse_properties()) 1400 1401 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1402 1403 if not expression: 1404 if self._match(TokenType.COMMAND): 1405 expression = self._parse_as_command(self._prev) 1406 else: 1407 begin = self._match(TokenType.BEGIN) 1408 return_ = self._match_text_seq("RETURN") 1409 1410 if self._match(TokenType.STRING, advance=False): 1411 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1412 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1413 expression = self._parse_string() 1414 extend_props(self._parse_properties()) 1415 else: 1416 expression = self._parse_statement() 1417 1418 end = self._match_text_seq("END") 1419 1420 if return_: 1421 expression = self.expression(exp.Return, this=expression) 1422 elif create_token.token_type == TokenType.INDEX: 1423 this = self._parse_index(index=self._parse_id_var()) 1424 elif create_token.token_type in self.DB_CREATABLES: 1425 table_parts = self._parse_table_parts(schema=True) 1426 1427 # exp.Properties.Location.POST_NAME 1428 self._match(TokenType.COMMA) 1429 extend_props(self._parse_properties(before=True)) 1430 1431 this = self._parse_schema(this=table_parts) 1432 1433 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1434 extend_props(self._parse_properties()) 1435 1436 self._match(TokenType.ALIAS) 1437 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1438 # exp.Properties.Location.POST_ALIAS 1439 extend_props(self._parse_properties()) 1440 1441 expression = self._parse_ddl_select() 1442 1443 if create_token.token_type == TokenType.TABLE: 1444 # exp.Properties.Location.POST_EXPRESSION 1445 extend_props(self._parse_properties()) 1446 1447 indexes = [] 1448 while True: 1449 index = self._parse_index() 1450 1451 # exp.Properties.Location.POST_INDEX 1452 extend_props(self._parse_properties()) 1453 1454 if not index: 1455 break 1456 else: 1457 self._match(TokenType.COMMA) 1458 indexes.append(index) 1459 elif create_token.token_type == TokenType.VIEW: 1460 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1461 no_schema_binding = True 1462 1463 shallow = self._match_text_seq("SHALLOW") 1464 1465 if self._match_texts(self.CLONE_KEYWORDS): 1466 copy = self._prev.text.lower() == "copy" 1467 clone = self.expression( 1468 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1469 ) 1470 1471 if self._curr: 1472 return self._parse_as_command(start) 1473 1474 return self.expression( 1475 exp.Create, 1476 comments=comments, 1477 this=this, 1478 kind=create_token.text.upper(), 1479 replace=replace, 1480 unique=unique, 1481 expression=expression, 1482 exists=exists, 1483 properties=properties, 1484 indexes=indexes, 1485 no_schema_binding=no_schema_binding, 1486 begin=begin, 1487 end=end, 1488 clone=clone, 1489 ) 1490 1491 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1492 # only used for teradata currently 1493 self._match(TokenType.COMMA) 1494 1495 kwargs = { 1496 "no": self._match_text_seq("NO"), 1497 "dual": self._match_text_seq("DUAL"), 1498 "before": self._match_text_seq("BEFORE"), 1499 "default": self._match_text_seq("DEFAULT"), 1500 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1501 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1502 "after": self._match_text_seq("AFTER"), 1503 "minimum": self._match_texts(("MIN", "MINIMUM")), 1504 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1505 } 1506 1507 if self._match_texts(self.PROPERTY_PARSERS): 1508 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1509 try: 1510 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1511 except TypeError: 1512 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1513 1514 return None 1515 1516 def _parse_property(self) -> t.Optional[exp.Expression]: 1517 if self._match_texts(self.PROPERTY_PARSERS): 1518 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1519 1520 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1521 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1522 1523 if self._match_text_seq("COMPOUND", "SORTKEY"): 1524 return self._parse_sortkey(compound=True) 1525 1526 if self._match_text_seq("SQL", "SECURITY"): 1527 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1528 1529 index = self._index 1530 key = self._parse_column() 1531 1532 if not self._match(TokenType.EQ): 1533 self._retreat(index) 1534 return None 1535 1536 return self.expression( 1537 exp.Property, 1538 this=key.to_dot() if isinstance(key, exp.Column) else key, 1539 value=self._parse_column() or self._parse_var(any_token=True), 1540 ) 1541 1542 def _parse_stored(self) -> exp.FileFormatProperty: 1543 self._match(TokenType.ALIAS) 1544 1545 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1546 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1547 1548 return self.expression( 1549 exp.FileFormatProperty, 1550 this=( 1551 self.expression( 1552 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1553 ) 1554 if input_format or output_format 1555 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1556 ), 1557 ) 1558 1559 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1560 self._match(TokenType.EQ) 1561 self._match(TokenType.ALIAS) 1562 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1563 1564 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1565 properties = [] 1566 while True: 1567 if before: 1568 prop = self._parse_property_before() 1569 else: 1570 prop = self._parse_property() 1571 1572 if not prop: 1573 break 1574 for p in ensure_list(prop): 1575 properties.append(p) 1576 1577 if properties: 1578 return self.expression(exp.Properties, expressions=properties) 1579 1580 return None 1581 1582 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1583 return self.expression( 1584 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1585 ) 1586 1587 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1588 if self._index >= 2: 1589 pre_volatile_token = self._tokens[self._index - 2] 1590 else: 1591 pre_volatile_token = None 1592 1593 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1594 return exp.VolatileProperty() 1595 1596 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1597 1598 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1599 self._match_pair(TokenType.EQ, TokenType.ON) 1600 1601 prop = self.expression(exp.WithSystemVersioningProperty) 1602 if self._match(TokenType.L_PAREN): 1603 self._match_text_seq("HISTORY_TABLE", "=") 1604 prop.set("this", self._parse_table_parts()) 1605 1606 if self._match(TokenType.COMMA): 1607 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1608 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1609 1610 self._match_r_paren() 1611 1612 return prop 1613 1614 def _parse_with_property( 1615 self, 1616 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1617 if self._match(TokenType.L_PAREN, advance=False): 1618 return self._parse_wrapped_csv(self._parse_property) 1619 1620 if self._match_text_seq("JOURNAL"): 1621 return self._parse_withjournaltable() 1622 1623 if self._match_text_seq("DATA"): 1624 return self._parse_withdata(no=False) 1625 elif self._match_text_seq("NO", "DATA"): 1626 return self._parse_withdata(no=True) 1627 1628 if not self._next: 1629 return None 1630 1631 return self._parse_withisolatedloading() 1632 1633 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1634 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1635 self._match(TokenType.EQ) 1636 1637 user = self._parse_id_var() 1638 self._match(TokenType.PARAMETER) 1639 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1640 1641 if not user or not host: 1642 return None 1643 1644 return exp.DefinerProperty(this=f"{user}@{host}") 1645 1646 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1647 self._match(TokenType.TABLE) 1648 self._match(TokenType.EQ) 1649 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1650 1651 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1652 return self.expression(exp.LogProperty, no=no) 1653 1654 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1655 return self.expression(exp.JournalProperty, **kwargs) 1656 1657 def _parse_checksum(self) -> exp.ChecksumProperty: 1658 self._match(TokenType.EQ) 1659 1660 on = None 1661 if self._match(TokenType.ON): 1662 on = True 1663 elif self._match_text_seq("OFF"): 1664 on = False 1665 1666 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1667 1668 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1669 return self.expression( 1670 exp.Cluster, 1671 expressions=( 1672 self._parse_wrapped_csv(self._parse_ordered) 1673 if wrapped 1674 else self._parse_csv(self._parse_ordered) 1675 ), 1676 ) 1677 1678 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1679 self._match_text_seq("BY") 1680 1681 self._match_l_paren() 1682 expressions = self._parse_csv(self._parse_column) 1683 self._match_r_paren() 1684 1685 if self._match_text_seq("SORTED", "BY"): 1686 self._match_l_paren() 1687 sorted_by = self._parse_csv(self._parse_ordered) 1688 self._match_r_paren() 1689 else: 1690 sorted_by = None 1691 1692 self._match(TokenType.INTO) 1693 buckets = self._parse_number() 1694 self._match_text_seq("BUCKETS") 1695 1696 return self.expression( 1697 exp.ClusteredByProperty, 1698 expressions=expressions, 1699 sorted_by=sorted_by, 1700 buckets=buckets, 1701 ) 1702 1703 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1704 if not self._match_text_seq("GRANTS"): 1705 self._retreat(self._index - 1) 1706 return None 1707 1708 return self.expression(exp.CopyGrantsProperty) 1709 1710 def _parse_freespace(self) -> exp.FreespaceProperty: 1711 self._match(TokenType.EQ) 1712 return self.expression( 1713 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1714 ) 1715 1716 def _parse_mergeblockratio( 1717 self, no: bool = False, default: bool = False 1718 ) -> exp.MergeBlockRatioProperty: 1719 if self._match(TokenType.EQ): 1720 return self.expression( 1721 exp.MergeBlockRatioProperty, 1722 this=self._parse_number(), 1723 percent=self._match(TokenType.PERCENT), 1724 ) 1725 1726 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1727 1728 def _parse_datablocksize( 1729 self, 1730 default: t.Optional[bool] = None, 1731 minimum: t.Optional[bool] = None, 1732 maximum: t.Optional[bool] = None, 1733 ) -> exp.DataBlocksizeProperty: 1734 self._match(TokenType.EQ) 1735 size = self._parse_number() 1736 1737 units = None 1738 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1739 units = self._prev.text 1740 1741 return self.expression( 1742 exp.DataBlocksizeProperty, 1743 size=size, 1744 units=units, 1745 default=default, 1746 minimum=minimum, 1747 maximum=maximum, 1748 ) 1749 1750 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1751 self._match(TokenType.EQ) 1752 always = self._match_text_seq("ALWAYS") 1753 manual = self._match_text_seq("MANUAL") 1754 never = self._match_text_seq("NEVER") 1755 default = self._match_text_seq("DEFAULT") 1756 1757 autotemp = None 1758 if self._match_text_seq("AUTOTEMP"): 1759 autotemp = self._parse_schema() 1760 1761 return self.expression( 1762 exp.BlockCompressionProperty, 1763 always=always, 1764 manual=manual, 1765 never=never, 1766 default=default, 1767 autotemp=autotemp, 1768 ) 1769 1770 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1771 no = self._match_text_seq("NO") 1772 concurrent = self._match_text_seq("CONCURRENT") 1773 self._match_text_seq("ISOLATED", "LOADING") 1774 for_all = self._match_text_seq("FOR", "ALL") 1775 for_insert = self._match_text_seq("FOR", "INSERT") 1776 for_none = self._match_text_seq("FOR", "NONE") 1777 return self.expression( 1778 exp.IsolatedLoadingProperty, 1779 no=no, 1780 concurrent=concurrent, 1781 for_all=for_all, 1782 for_insert=for_insert, 1783 for_none=for_none, 1784 ) 1785 1786 def _parse_locking(self) -> exp.LockingProperty: 1787 if self._match(TokenType.TABLE): 1788 kind = "TABLE" 1789 elif self._match(TokenType.VIEW): 1790 kind = "VIEW" 1791 elif self._match(TokenType.ROW): 1792 kind = "ROW" 1793 elif self._match_text_seq("DATABASE"): 1794 kind = "DATABASE" 1795 else: 1796 kind = None 1797 1798 if kind in ("DATABASE", "TABLE", "VIEW"): 1799 this = self._parse_table_parts() 1800 else: 1801 this = None 1802 1803 if self._match(TokenType.FOR): 1804 for_or_in = "FOR" 1805 elif self._match(TokenType.IN): 1806 for_or_in = "IN" 1807 else: 1808 for_or_in = None 1809 1810 if self._match_text_seq("ACCESS"): 1811 lock_type = "ACCESS" 1812 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1813 lock_type = "EXCLUSIVE" 1814 elif self._match_text_seq("SHARE"): 1815 lock_type = "SHARE" 1816 elif self._match_text_seq("READ"): 1817 lock_type = "READ" 1818 elif self._match_text_seq("WRITE"): 1819 lock_type = "WRITE" 1820 elif self._match_text_seq("CHECKSUM"): 1821 lock_type = "CHECKSUM" 1822 else: 1823 lock_type = None 1824 1825 override = self._match_text_seq("OVERRIDE") 1826 1827 return self.expression( 1828 exp.LockingProperty, 1829 this=this, 1830 kind=kind, 1831 for_or_in=for_or_in, 1832 lock_type=lock_type, 1833 override=override, 1834 ) 1835 1836 def _parse_partition_by(self) -> t.List[exp.Expression]: 1837 if self._match(TokenType.PARTITION_BY): 1838 return self._parse_csv(self._parse_conjunction) 1839 return [] 1840 1841 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1842 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1843 if self._match_text_seq("MINVALUE"): 1844 return exp.var("MINVALUE") 1845 if self._match_text_seq("MAXVALUE"): 1846 return exp.var("MAXVALUE") 1847 return self._parse_bitwise() 1848 1849 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1850 expression = None 1851 from_expressions = None 1852 to_expressions = None 1853 1854 if self._match(TokenType.IN): 1855 this = self._parse_wrapped_csv(self._parse_bitwise) 1856 elif self._match(TokenType.FROM): 1857 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1858 self._match_text_seq("TO") 1859 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1860 elif self._match_text_seq("WITH", "(", "MODULUS"): 1861 this = self._parse_number() 1862 self._match_text_seq(",", "REMAINDER") 1863 expression = self._parse_number() 1864 self._match_r_paren() 1865 else: 1866 self.raise_error("Failed to parse partition bound spec.") 1867 1868 return self.expression( 1869 exp.PartitionBoundSpec, 1870 this=this, 1871 expression=expression, 1872 from_expressions=from_expressions, 1873 to_expressions=to_expressions, 1874 ) 1875 1876 # https://www.postgresql.org/docs/current/sql-createtable.html 1877 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1878 if not self._match_text_seq("OF"): 1879 self._retreat(self._index - 1) 1880 return None 1881 1882 this = self._parse_table(schema=True) 1883 1884 if self._match(TokenType.DEFAULT): 1885 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1886 elif self._match_text_seq("FOR", "VALUES"): 1887 expression = self._parse_partition_bound_spec() 1888 else: 1889 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1890 1891 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1892 1893 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1894 self._match(TokenType.EQ) 1895 return self.expression( 1896 exp.PartitionedByProperty, 1897 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1898 ) 1899 1900 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1901 if self._match_text_seq("AND", "STATISTICS"): 1902 statistics = True 1903 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1904 statistics = False 1905 else: 1906 statistics = None 1907 1908 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1909 1910 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1911 if self._match_text_seq("SQL"): 1912 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 1913 return None 1914 1915 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1916 if self._match_text_seq("SQL", "DATA"): 1917 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 1918 return None 1919 1920 def _parse_no_property(self) -> t.Optional[exp.Expression]: 1921 if self._match_text_seq("PRIMARY", "INDEX"): 1922 return exp.NoPrimaryIndexProperty() 1923 if self._match_text_seq("SQL"): 1924 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 1925 return None 1926 1927 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1928 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1929 return exp.OnCommitProperty() 1930 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1931 return exp.OnCommitProperty(delete=True) 1932 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1933 1934 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1935 if self._match_text_seq("SQL", "DATA"): 1936 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 1937 return None 1938 1939 def _parse_distkey(self) -> exp.DistKeyProperty: 1940 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1941 1942 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1943 table = self._parse_table(schema=True) 1944 1945 options = [] 1946 while self._match_texts(("INCLUDING", "EXCLUDING")): 1947 this = self._prev.text.upper() 1948 1949 id_var = self._parse_id_var() 1950 if not id_var: 1951 return None 1952 1953 options.append( 1954 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1955 ) 1956 1957 return self.expression(exp.LikeProperty, this=table, expressions=options) 1958 1959 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1960 return self.expression( 1961 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1962 ) 1963 1964 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1965 self._match(TokenType.EQ) 1966 return self.expression( 1967 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1968 ) 1969 1970 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1971 self._match_text_seq("WITH", "CONNECTION") 1972 return self.expression( 1973 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1974 ) 1975 1976 def _parse_returns(self) -> exp.ReturnsProperty: 1977 value: t.Optional[exp.Expression] 1978 is_table = self._match(TokenType.TABLE) 1979 1980 if is_table: 1981 if self._match(TokenType.LT): 1982 value = self.expression( 1983 exp.Schema, 1984 this="TABLE", 1985 expressions=self._parse_csv(self._parse_struct_types), 1986 ) 1987 if not self._match(TokenType.GT): 1988 self.raise_error("Expecting >") 1989 else: 1990 value = self._parse_schema(exp.var("TABLE")) 1991 else: 1992 value = self._parse_types() 1993 1994 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1995 1996 def _parse_describe(self) -> exp.Describe: 1997 kind = self._match_set(self.CREATABLES) and self._prev.text 1998 extended = self._match_text_seq("EXTENDED") 1999 this = self._parse_table(schema=True) 2000 properties = self._parse_properties() 2001 expressions = properties.expressions if properties else None 2002 return self.expression( 2003 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2004 ) 2005 2006 def _parse_insert(self) -> exp.Insert: 2007 comments = ensure_list(self._prev_comments) 2008 overwrite = self._match(TokenType.OVERWRITE) 2009 ignore = self._match(TokenType.IGNORE) 2010 local = self._match_text_seq("LOCAL") 2011 alternative = None 2012 2013 if self._match_text_seq("DIRECTORY"): 2014 this: t.Optional[exp.Expression] = self.expression( 2015 exp.Directory, 2016 this=self._parse_var_or_string(), 2017 local=local, 2018 row_format=self._parse_row_format(match_row=True), 2019 ) 2020 else: 2021 if self._match(TokenType.OR): 2022 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2023 2024 self._match(TokenType.INTO) 2025 comments += ensure_list(self._prev_comments) 2026 self._match(TokenType.TABLE) 2027 this = self._parse_table(schema=True) 2028 2029 returning = self._parse_returning() 2030 2031 return self.expression( 2032 exp.Insert, 2033 comments=comments, 2034 this=this, 2035 by_name=self._match_text_seq("BY", "NAME"), 2036 exists=self._parse_exists(), 2037 partition=self._parse_partition(), 2038 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2039 and self._parse_conjunction(), 2040 expression=self._parse_ddl_select(), 2041 conflict=self._parse_on_conflict(), 2042 returning=returning or self._parse_returning(), 2043 overwrite=overwrite, 2044 alternative=alternative, 2045 ignore=ignore, 2046 ) 2047 2048 def _parse_kill(self) -> exp.Kill: 2049 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2050 2051 return self.expression( 2052 exp.Kill, 2053 this=self._parse_primary(), 2054 kind=kind, 2055 ) 2056 2057 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2058 conflict = self._match_text_seq("ON", "CONFLICT") 2059 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2060 2061 if not conflict and not duplicate: 2062 return None 2063 2064 nothing = None 2065 expressions = None 2066 key = None 2067 constraint = None 2068 2069 if conflict: 2070 if self._match_text_seq("ON", "CONSTRAINT"): 2071 constraint = self._parse_id_var() 2072 else: 2073 key = self._parse_csv(self._parse_value) 2074 2075 self._match_text_seq("DO") 2076 if self._match_text_seq("NOTHING"): 2077 nothing = True 2078 else: 2079 self._match(TokenType.UPDATE) 2080 self._match(TokenType.SET) 2081 expressions = self._parse_csv(self._parse_equality) 2082 2083 return self.expression( 2084 exp.OnConflict, 2085 duplicate=duplicate, 2086 expressions=expressions, 2087 nothing=nothing, 2088 key=key, 2089 constraint=constraint, 2090 ) 2091 2092 def _parse_returning(self) -> t.Optional[exp.Returning]: 2093 if not self._match(TokenType.RETURNING): 2094 return None 2095 return self.expression( 2096 exp.Returning, 2097 expressions=self._parse_csv(self._parse_expression), 2098 into=self._match(TokenType.INTO) and self._parse_table_part(), 2099 ) 2100 2101 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2102 if not self._match(TokenType.FORMAT): 2103 return None 2104 return self._parse_row_format() 2105 2106 def _parse_row_format( 2107 self, match_row: bool = False 2108 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2109 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2110 return None 2111 2112 if self._match_text_seq("SERDE"): 2113 this = self._parse_string() 2114 2115 serde_properties = None 2116 if self._match(TokenType.SERDE_PROPERTIES): 2117 serde_properties = self.expression( 2118 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2119 ) 2120 2121 return self.expression( 2122 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2123 ) 2124 2125 self._match_text_seq("DELIMITED") 2126 2127 kwargs = {} 2128 2129 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2130 kwargs["fields"] = self._parse_string() 2131 if self._match_text_seq("ESCAPED", "BY"): 2132 kwargs["escaped"] = self._parse_string() 2133 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2134 kwargs["collection_items"] = self._parse_string() 2135 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2136 kwargs["map_keys"] = self._parse_string() 2137 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2138 kwargs["lines"] = self._parse_string() 2139 if self._match_text_seq("NULL", "DEFINED", "AS"): 2140 kwargs["null"] = self._parse_string() 2141 2142 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2143 2144 def _parse_load(self) -> exp.LoadData | exp.Command: 2145 if self._match_text_seq("DATA"): 2146 local = self._match_text_seq("LOCAL") 2147 self._match_text_seq("INPATH") 2148 inpath = self._parse_string() 2149 overwrite = self._match(TokenType.OVERWRITE) 2150 self._match_pair(TokenType.INTO, TokenType.TABLE) 2151 2152 return self.expression( 2153 exp.LoadData, 2154 this=self._parse_table(schema=True), 2155 local=local, 2156 overwrite=overwrite, 2157 inpath=inpath, 2158 partition=self._parse_partition(), 2159 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2160 serde=self._match_text_seq("SERDE") and self._parse_string(), 2161 ) 2162 return self._parse_as_command(self._prev) 2163 2164 def _parse_delete(self) -> exp.Delete: 2165 # This handles MySQL's "Multiple-Table Syntax" 2166 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2167 tables = None 2168 comments = self._prev_comments 2169 if not self._match(TokenType.FROM, advance=False): 2170 tables = self._parse_csv(self._parse_table) or None 2171 2172 returning = self._parse_returning() 2173 2174 return self.expression( 2175 exp.Delete, 2176 comments=comments, 2177 tables=tables, 2178 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2179 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2180 where=self._parse_where(), 2181 returning=returning or self._parse_returning(), 2182 limit=self._parse_limit(), 2183 ) 2184 2185 def _parse_update(self) -> exp.Update: 2186 comments = self._prev_comments 2187 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2188 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2189 returning = self._parse_returning() 2190 return self.expression( 2191 exp.Update, 2192 comments=comments, 2193 **{ # type: ignore 2194 "this": this, 2195 "expressions": expressions, 2196 "from": self._parse_from(joins=True), 2197 "where": self._parse_where(), 2198 "returning": returning or self._parse_returning(), 2199 "order": self._parse_order(), 2200 "limit": self._parse_limit(), 2201 }, 2202 ) 2203 2204 def _parse_uncache(self) -> exp.Uncache: 2205 if not self._match(TokenType.TABLE): 2206 self.raise_error("Expecting TABLE after UNCACHE") 2207 2208 return self.expression( 2209 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2210 ) 2211 2212 def _parse_cache(self) -> exp.Cache: 2213 lazy = self._match_text_seq("LAZY") 2214 self._match(TokenType.TABLE) 2215 table = self._parse_table(schema=True) 2216 2217 options = [] 2218 if self._match_text_seq("OPTIONS"): 2219 self._match_l_paren() 2220 k = self._parse_string() 2221 self._match(TokenType.EQ) 2222 v = self._parse_string() 2223 options = [k, v] 2224 self._match_r_paren() 2225 2226 self._match(TokenType.ALIAS) 2227 return self.expression( 2228 exp.Cache, 2229 this=table, 2230 lazy=lazy, 2231 options=options, 2232 expression=self._parse_select(nested=True), 2233 ) 2234 2235 def _parse_partition(self) -> t.Optional[exp.Partition]: 2236 if not self._match(TokenType.PARTITION): 2237 return None 2238 2239 return self.expression( 2240 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2241 ) 2242 2243 def _parse_value(self) -> exp.Tuple: 2244 if self._match(TokenType.L_PAREN): 2245 expressions = self._parse_csv(self._parse_expression) 2246 self._match_r_paren() 2247 return self.expression(exp.Tuple, expressions=expressions) 2248 2249 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2250 # https://prestodb.io/docs/current/sql/values.html 2251 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2252 2253 def _parse_projections(self) -> t.List[exp.Expression]: 2254 return self._parse_expressions() 2255 2256 def _parse_select( 2257 self, 2258 nested: bool = False, 2259 table: bool = False, 2260 parse_subquery_alias: bool = True, 2261 parse_set_operation: bool = True, 2262 ) -> t.Optional[exp.Expression]: 2263 cte = self._parse_with() 2264 2265 if cte: 2266 this = self._parse_statement() 2267 2268 if not this: 2269 self.raise_error("Failed to parse any statement following CTE") 2270 return cte 2271 2272 if "with" in this.arg_types: 2273 this.set("with", cte) 2274 else: 2275 self.raise_error(f"{this.key} does not support CTE") 2276 this = cte 2277 2278 return this 2279 2280 # duckdb supports leading with FROM x 2281 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2282 2283 if self._match(TokenType.SELECT): 2284 comments = self._prev_comments 2285 2286 hint = self._parse_hint() 2287 all_ = self._match(TokenType.ALL) 2288 distinct = self._match_set(self.DISTINCT_TOKENS) 2289 2290 kind = ( 2291 self._match(TokenType.ALIAS) 2292 and self._match_texts(("STRUCT", "VALUE")) 2293 and self._prev.text.upper() 2294 ) 2295 2296 if distinct: 2297 distinct = self.expression( 2298 exp.Distinct, 2299 on=self._parse_value() if self._match(TokenType.ON) else None, 2300 ) 2301 2302 if all_ and distinct: 2303 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2304 2305 limit = self._parse_limit(top=True) 2306 projections = self._parse_projections() 2307 2308 this = self.expression( 2309 exp.Select, 2310 kind=kind, 2311 hint=hint, 2312 distinct=distinct, 2313 expressions=projections, 2314 limit=limit, 2315 ) 2316 this.comments = comments 2317 2318 into = self._parse_into() 2319 if into: 2320 this.set("into", into) 2321 2322 if not from_: 2323 from_ = self._parse_from() 2324 2325 if from_: 2326 this.set("from", from_) 2327 2328 this = self._parse_query_modifiers(this) 2329 elif (table or nested) and self._match(TokenType.L_PAREN): 2330 if self._match(TokenType.PIVOT): 2331 this = self._parse_simplified_pivot() 2332 elif self._match(TokenType.FROM): 2333 this = exp.select("*").from_( 2334 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2335 ) 2336 else: 2337 this = ( 2338 self._parse_table() 2339 if table 2340 else self._parse_select(nested=True, parse_set_operation=False) 2341 ) 2342 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2343 2344 self._match_r_paren() 2345 2346 # We return early here so that the UNION isn't attached to the subquery by the 2347 # following call to _parse_set_operations, but instead becomes the parent node 2348 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2349 elif self._match(TokenType.VALUES): 2350 this = self.expression( 2351 exp.Values, 2352 expressions=self._parse_csv(self._parse_value), 2353 alias=self._parse_table_alias(), 2354 ) 2355 elif from_: 2356 this = exp.select("*").from_(from_.this, copy=False) 2357 else: 2358 this = None 2359 2360 if parse_set_operation: 2361 return self._parse_set_operations(this) 2362 return this 2363 2364 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2365 if not skip_with_token and not self._match(TokenType.WITH): 2366 return None 2367 2368 comments = self._prev_comments 2369 recursive = self._match(TokenType.RECURSIVE) 2370 2371 expressions = [] 2372 while True: 2373 expressions.append(self._parse_cte()) 2374 2375 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2376 break 2377 else: 2378 self._match(TokenType.WITH) 2379 2380 return self.expression( 2381 exp.With, comments=comments, expressions=expressions, recursive=recursive 2382 ) 2383 2384 def _parse_cte(self) -> exp.CTE: 2385 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2386 if not alias or not alias.this: 2387 self.raise_error("Expected CTE to have alias") 2388 2389 self._match(TokenType.ALIAS) 2390 return self.expression( 2391 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2392 ) 2393 2394 def _parse_table_alias( 2395 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2396 ) -> t.Optional[exp.TableAlias]: 2397 any_token = self._match(TokenType.ALIAS) 2398 alias = ( 2399 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2400 or self._parse_string_as_identifier() 2401 ) 2402 2403 index = self._index 2404 if self._match(TokenType.L_PAREN): 2405 columns = self._parse_csv(self._parse_function_parameter) 2406 self._match_r_paren() if columns else self._retreat(index) 2407 else: 2408 columns = None 2409 2410 if not alias and not columns: 2411 return None 2412 2413 return self.expression(exp.TableAlias, this=alias, columns=columns) 2414 2415 def _parse_subquery( 2416 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2417 ) -> t.Optional[exp.Subquery]: 2418 if not this: 2419 return None 2420 2421 return self.expression( 2422 exp.Subquery, 2423 this=this, 2424 pivots=self._parse_pivots(), 2425 alias=self._parse_table_alias() if parse_alias else None, 2426 ) 2427 2428 def _parse_query_modifiers( 2429 self, this: t.Optional[exp.Expression] 2430 ) -> t.Optional[exp.Expression]: 2431 if isinstance(this, self.MODIFIABLES): 2432 for join in iter(self._parse_join, None): 2433 this.append("joins", join) 2434 for lateral in iter(self._parse_lateral, None): 2435 this.append("laterals", lateral) 2436 2437 while True: 2438 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2439 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2440 key, expression = parser(self) 2441 2442 if expression: 2443 this.set(key, expression) 2444 if key == "limit": 2445 offset = expression.args.pop("offset", None) 2446 if offset: 2447 this.set("offset", exp.Offset(expression=offset)) 2448 continue 2449 break 2450 return this 2451 2452 def _parse_hint(self) -> t.Optional[exp.Hint]: 2453 if self._match(TokenType.HINT): 2454 hints = [] 2455 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2456 hints.extend(hint) 2457 2458 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2459 self.raise_error("Expected */ after HINT") 2460 2461 return self.expression(exp.Hint, expressions=hints) 2462 2463 return None 2464 2465 def _parse_into(self) -> t.Optional[exp.Into]: 2466 if not self._match(TokenType.INTO): 2467 return None 2468 2469 temp = self._match(TokenType.TEMPORARY) 2470 unlogged = self._match_text_seq("UNLOGGED") 2471 self._match(TokenType.TABLE) 2472 2473 return self.expression( 2474 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2475 ) 2476 2477 def _parse_from( 2478 self, joins: bool = False, skip_from_token: bool = False 2479 ) -> t.Optional[exp.From]: 2480 if not skip_from_token and not self._match(TokenType.FROM): 2481 return None 2482 2483 return self.expression( 2484 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2485 ) 2486 2487 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2488 if not self._match(TokenType.MATCH_RECOGNIZE): 2489 return None 2490 2491 self._match_l_paren() 2492 2493 partition = self._parse_partition_by() 2494 order = self._parse_order() 2495 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2496 2497 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2498 rows = exp.var("ONE ROW PER MATCH") 2499 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2500 text = "ALL ROWS PER MATCH" 2501 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2502 text += f" SHOW EMPTY MATCHES" 2503 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2504 text += f" OMIT EMPTY MATCHES" 2505 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2506 text += f" WITH UNMATCHED ROWS" 2507 rows = exp.var(text) 2508 else: 2509 rows = None 2510 2511 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2512 text = "AFTER MATCH SKIP" 2513 if self._match_text_seq("PAST", "LAST", "ROW"): 2514 text += f" PAST LAST ROW" 2515 elif self._match_text_seq("TO", "NEXT", "ROW"): 2516 text += f" TO NEXT ROW" 2517 elif self._match_text_seq("TO", "FIRST"): 2518 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2519 elif self._match_text_seq("TO", "LAST"): 2520 text += f" TO LAST {self._advance_any().text}" # type: ignore 2521 after = exp.var(text) 2522 else: 2523 after = None 2524 2525 if self._match_text_seq("PATTERN"): 2526 self._match_l_paren() 2527 2528 if not self._curr: 2529 self.raise_error("Expecting )", self._curr) 2530 2531 paren = 1 2532 start = self._curr 2533 2534 while self._curr and paren > 0: 2535 if self._curr.token_type == TokenType.L_PAREN: 2536 paren += 1 2537 if self._curr.token_type == TokenType.R_PAREN: 2538 paren -= 1 2539 2540 end = self._prev 2541 self._advance() 2542 2543 if paren > 0: 2544 self.raise_error("Expecting )", self._curr) 2545 2546 pattern = exp.var(self._find_sql(start, end)) 2547 else: 2548 pattern = None 2549 2550 define = ( 2551 self._parse_csv(self._parse_name_as_expression) 2552 if self._match_text_seq("DEFINE") 2553 else None 2554 ) 2555 2556 self._match_r_paren() 2557 2558 return self.expression( 2559 exp.MatchRecognize, 2560 partition_by=partition, 2561 order=order, 2562 measures=measures, 2563 rows=rows, 2564 after=after, 2565 pattern=pattern, 2566 define=define, 2567 alias=self._parse_table_alias(), 2568 ) 2569 2570 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2571 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2572 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2573 cross_apply = False 2574 2575 if cross_apply is not None: 2576 this = self._parse_select(table=True) 2577 view = None 2578 outer = None 2579 elif self._match(TokenType.LATERAL): 2580 this = self._parse_select(table=True) 2581 view = self._match(TokenType.VIEW) 2582 outer = self._match(TokenType.OUTER) 2583 else: 2584 return None 2585 2586 if not this: 2587 this = ( 2588 self._parse_unnest() 2589 or self._parse_function() 2590 or self._parse_id_var(any_token=False) 2591 ) 2592 2593 while self._match(TokenType.DOT): 2594 this = exp.Dot( 2595 this=this, 2596 expression=self._parse_function() or self._parse_id_var(any_token=False), 2597 ) 2598 2599 if view: 2600 table = self._parse_id_var(any_token=False) 2601 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2602 table_alias: t.Optional[exp.TableAlias] = self.expression( 2603 exp.TableAlias, this=table, columns=columns 2604 ) 2605 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2606 # We move the alias from the lateral's child node to the lateral itself 2607 table_alias = this.args["alias"].pop() 2608 else: 2609 table_alias = self._parse_table_alias() 2610 2611 return self.expression( 2612 exp.Lateral, 2613 this=this, 2614 view=view, 2615 outer=outer, 2616 alias=table_alias, 2617 cross_apply=cross_apply, 2618 ) 2619 2620 def _parse_join_parts( 2621 self, 2622 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2623 return ( 2624 self._match_set(self.JOIN_METHODS) and self._prev, 2625 self._match_set(self.JOIN_SIDES) and self._prev, 2626 self._match_set(self.JOIN_KINDS) and self._prev, 2627 ) 2628 2629 def _parse_join( 2630 self, skip_join_token: bool = False, parse_bracket: bool = False 2631 ) -> t.Optional[exp.Join]: 2632 if self._match(TokenType.COMMA): 2633 return self.expression(exp.Join, this=self._parse_table()) 2634 2635 index = self._index 2636 method, side, kind = self._parse_join_parts() 2637 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2638 join = self._match(TokenType.JOIN) 2639 2640 if not skip_join_token and not join: 2641 self._retreat(index) 2642 kind = None 2643 method = None 2644 side = None 2645 2646 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2647 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2648 2649 if not skip_join_token and not join and not outer_apply and not cross_apply: 2650 return None 2651 2652 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2653 2654 if method: 2655 kwargs["method"] = method.text 2656 if side: 2657 kwargs["side"] = side.text 2658 if kind: 2659 kwargs["kind"] = kind.text 2660 if hint: 2661 kwargs["hint"] = hint 2662 2663 if self._match(TokenType.ON): 2664 kwargs["on"] = self._parse_conjunction() 2665 elif self._match(TokenType.USING): 2666 kwargs["using"] = self._parse_wrapped_id_vars() 2667 elif not (kind and kind.token_type == TokenType.CROSS): 2668 index = self._index 2669 join = self._parse_join() 2670 2671 if join and self._match(TokenType.ON): 2672 kwargs["on"] = self._parse_conjunction() 2673 elif join and self._match(TokenType.USING): 2674 kwargs["using"] = self._parse_wrapped_id_vars() 2675 else: 2676 join = None 2677 self._retreat(index) 2678 2679 kwargs["this"].set("joins", [join] if join else None) 2680 2681 comments = [c for token in (method, side, kind) if token for c in token.comments] 2682 return self.expression(exp.Join, comments=comments, **kwargs) 2683 2684 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2685 this = self._parse_conjunction() 2686 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2687 return this 2688 2689 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2690 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2691 2692 return this 2693 2694 def _parse_index( 2695 self, 2696 index: t.Optional[exp.Expression] = None, 2697 ) -> t.Optional[exp.Index]: 2698 if index: 2699 unique = None 2700 primary = None 2701 amp = None 2702 2703 self._match(TokenType.ON) 2704 self._match(TokenType.TABLE) # hive 2705 table = self._parse_table_parts(schema=True) 2706 else: 2707 unique = self._match(TokenType.UNIQUE) 2708 primary = self._match_text_seq("PRIMARY") 2709 amp = self._match_text_seq("AMP") 2710 2711 if not self._match(TokenType.INDEX): 2712 return None 2713 2714 index = self._parse_id_var() 2715 table = None 2716 2717 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2718 2719 if self._match(TokenType.L_PAREN, advance=False): 2720 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2721 else: 2722 columns = None 2723 2724 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2725 2726 return self.expression( 2727 exp.Index, 2728 this=index, 2729 table=table, 2730 using=using, 2731 columns=columns, 2732 unique=unique, 2733 primary=primary, 2734 amp=amp, 2735 include=include, 2736 partition_by=self._parse_partition_by(), 2737 where=self._parse_where(), 2738 ) 2739 2740 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2741 hints: t.List[exp.Expression] = [] 2742 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2743 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2744 hints.append( 2745 self.expression( 2746 exp.WithTableHint, 2747 expressions=self._parse_csv( 2748 lambda: self._parse_function() or self._parse_var(any_token=True) 2749 ), 2750 ) 2751 ) 2752 self._match_r_paren() 2753 else: 2754 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2755 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2756 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2757 2758 self._match_texts(("INDEX", "KEY")) 2759 if self._match(TokenType.FOR): 2760 hint.set("target", self._advance_any() and self._prev.text.upper()) 2761 2762 hint.set("expressions", self._parse_wrapped_id_vars()) 2763 hints.append(hint) 2764 2765 return hints or None 2766 2767 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2768 return ( 2769 (not schema and self._parse_function(optional_parens=False)) 2770 or self._parse_id_var(any_token=False) 2771 or self._parse_string_as_identifier() 2772 or self._parse_placeholder() 2773 ) 2774 2775 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2776 catalog = None 2777 db = None 2778 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2779 2780 while self._match(TokenType.DOT): 2781 if catalog: 2782 # This allows nesting the table in arbitrarily many dot expressions if needed 2783 table = self.expression( 2784 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2785 ) 2786 else: 2787 catalog = db 2788 db = table 2789 table = self._parse_table_part(schema=schema) or "" 2790 2791 if not table: 2792 self.raise_error(f"Expected table name but got {self._curr}") 2793 2794 return self.expression( 2795 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2796 ) 2797 2798 def _parse_table( 2799 self, 2800 schema: bool = False, 2801 joins: bool = False, 2802 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2803 parse_bracket: bool = False, 2804 ) -> t.Optional[exp.Expression]: 2805 lateral = self._parse_lateral() 2806 if lateral: 2807 return lateral 2808 2809 unnest = self._parse_unnest() 2810 if unnest: 2811 return unnest 2812 2813 values = self._parse_derived_table_values() 2814 if values: 2815 return values 2816 2817 subquery = self._parse_select(table=True) 2818 if subquery: 2819 if not subquery.args.get("pivots"): 2820 subquery.set("pivots", self._parse_pivots()) 2821 return subquery 2822 2823 bracket = parse_bracket and self._parse_bracket(None) 2824 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2825 this = t.cast( 2826 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2827 ) 2828 2829 if schema: 2830 return self._parse_schema(this=this) 2831 2832 version = self._parse_version() 2833 2834 if version: 2835 this.set("version", version) 2836 2837 if self.dialect.ALIAS_POST_TABLESAMPLE: 2838 table_sample = self._parse_table_sample() 2839 2840 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2841 if alias: 2842 this.set("alias", alias) 2843 2844 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2845 return self.expression( 2846 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2847 ) 2848 2849 this.set("hints", self._parse_table_hints()) 2850 2851 if not this.args.get("pivots"): 2852 this.set("pivots", self._parse_pivots()) 2853 2854 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2855 table_sample = self._parse_table_sample() 2856 2857 if table_sample: 2858 table_sample.set("this", this) 2859 this = table_sample 2860 2861 if joins: 2862 for join in iter(self._parse_join, None): 2863 this.append("joins", join) 2864 2865 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2866 this.set("ordinality", True) 2867 this.set("alias", self._parse_table_alias()) 2868 2869 return this 2870 2871 def _parse_version(self) -> t.Optional[exp.Version]: 2872 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2873 this = "TIMESTAMP" 2874 elif self._match(TokenType.VERSION_SNAPSHOT): 2875 this = "VERSION" 2876 else: 2877 return None 2878 2879 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2880 kind = self._prev.text.upper() 2881 start = self._parse_bitwise() 2882 self._match_texts(("TO", "AND")) 2883 end = self._parse_bitwise() 2884 expression: t.Optional[exp.Expression] = self.expression( 2885 exp.Tuple, expressions=[start, end] 2886 ) 2887 elif self._match_text_seq("CONTAINED", "IN"): 2888 kind = "CONTAINED IN" 2889 expression = self.expression( 2890 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2891 ) 2892 elif self._match(TokenType.ALL): 2893 kind = "ALL" 2894 expression = None 2895 else: 2896 self._match_text_seq("AS", "OF") 2897 kind = "AS OF" 2898 expression = self._parse_type() 2899 2900 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2901 2902 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2903 if not self._match(TokenType.UNNEST): 2904 return None 2905 2906 expressions = self._parse_wrapped_csv(self._parse_equality) 2907 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2908 2909 alias = self._parse_table_alias() if with_alias else None 2910 2911 if alias: 2912 if self.dialect.UNNEST_COLUMN_ONLY: 2913 if alias.args.get("columns"): 2914 self.raise_error("Unexpected extra column alias in unnest.") 2915 2916 alias.set("columns", [alias.this]) 2917 alias.set("this", None) 2918 2919 columns = alias.args.get("columns") or [] 2920 if offset and len(expressions) < len(columns): 2921 offset = columns.pop() 2922 2923 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2924 self._match(TokenType.ALIAS) 2925 offset = self._parse_id_var( 2926 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2927 ) or exp.to_identifier("offset") 2928 2929 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2930 2931 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2932 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2933 if not is_derived and not self._match(TokenType.VALUES): 2934 return None 2935 2936 expressions = self._parse_csv(self._parse_value) 2937 alias = self._parse_table_alias() 2938 2939 if is_derived: 2940 self._match_r_paren() 2941 2942 return self.expression( 2943 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2944 ) 2945 2946 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2947 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2948 as_modifier and self._match_text_seq("USING", "SAMPLE") 2949 ): 2950 return None 2951 2952 bucket_numerator = None 2953 bucket_denominator = None 2954 bucket_field = None 2955 percent = None 2956 size = None 2957 seed = None 2958 2959 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 2960 matched_l_paren = self._match(TokenType.L_PAREN) 2961 2962 if self.TABLESAMPLE_CSV: 2963 num = None 2964 expressions = self._parse_csv(self._parse_primary) 2965 else: 2966 expressions = None 2967 num = ( 2968 self._parse_factor() 2969 if self._match(TokenType.NUMBER, advance=False) 2970 else self._parse_primary() or self._parse_placeholder() 2971 ) 2972 2973 if self._match_text_seq("BUCKET"): 2974 bucket_numerator = self._parse_number() 2975 self._match_text_seq("OUT", "OF") 2976 bucket_denominator = bucket_denominator = self._parse_number() 2977 self._match(TokenType.ON) 2978 bucket_field = self._parse_field() 2979 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2980 percent = num 2981 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 2982 size = num 2983 else: 2984 percent = num 2985 2986 if matched_l_paren: 2987 self._match_r_paren() 2988 2989 if self._match(TokenType.L_PAREN): 2990 method = self._parse_var(upper=True) 2991 seed = self._match(TokenType.COMMA) and self._parse_number() 2992 self._match_r_paren() 2993 elif self._match_texts(("SEED", "REPEATABLE")): 2994 seed = self._parse_wrapped(self._parse_number) 2995 2996 return self.expression( 2997 exp.TableSample, 2998 expressions=expressions, 2999 method=method, 3000 bucket_numerator=bucket_numerator, 3001 bucket_denominator=bucket_denominator, 3002 bucket_field=bucket_field, 3003 percent=percent, 3004 size=size, 3005 seed=seed, 3006 ) 3007 3008 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3009 return list(iter(self._parse_pivot, None)) or None 3010 3011 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 3012 return list(iter(self._parse_join, None)) or None 3013 3014 # https://duckdb.org/docs/sql/statements/pivot 3015 def _parse_simplified_pivot(self) -> exp.Pivot: 3016 def _parse_on() -> t.Optional[exp.Expression]: 3017 this = self._parse_bitwise() 3018 return self._parse_in(this) if self._match(TokenType.IN) else this 3019 3020 this = self._parse_table() 3021 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3022 using = self._match(TokenType.USING) and self._parse_csv( 3023 lambda: self._parse_alias(self._parse_function()) 3024 ) 3025 group = self._parse_group() 3026 return self.expression( 3027 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3028 ) 3029 3030 def _parse_pivot_in(self) -> exp.In: 3031 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3032 this = self._parse_conjunction() 3033 3034 self._match(TokenType.ALIAS) 3035 alias = self._parse_field() 3036 if alias: 3037 return self.expression(exp.PivotAlias, this=this, alias=alias) 3038 3039 return this 3040 3041 value = self._parse_column() 3042 3043 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3044 self.raise_error("Expecting IN (") 3045 3046 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3047 3048 self._match_r_paren() 3049 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3050 3051 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3052 index = self._index 3053 include_nulls = None 3054 3055 if self._match(TokenType.PIVOT): 3056 unpivot = False 3057 elif self._match(TokenType.UNPIVOT): 3058 unpivot = True 3059 3060 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3061 if self._match_text_seq("INCLUDE", "NULLS"): 3062 include_nulls = True 3063 elif self._match_text_seq("EXCLUDE", "NULLS"): 3064 include_nulls = False 3065 else: 3066 return None 3067 3068 expressions = [] 3069 3070 if not self._match(TokenType.L_PAREN): 3071 self._retreat(index) 3072 return None 3073 3074 if unpivot: 3075 expressions = self._parse_csv(self._parse_column) 3076 else: 3077 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3078 3079 if not expressions: 3080 self.raise_error("Failed to parse PIVOT's aggregation list") 3081 3082 if not self._match(TokenType.FOR): 3083 self.raise_error("Expecting FOR") 3084 3085 field = self._parse_pivot_in() 3086 3087 self._match_r_paren() 3088 3089 pivot = self.expression( 3090 exp.Pivot, 3091 expressions=expressions, 3092 field=field, 3093 unpivot=unpivot, 3094 include_nulls=include_nulls, 3095 ) 3096 3097 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3098 pivot.set("alias", self._parse_table_alias()) 3099 3100 if not unpivot: 3101 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3102 3103 columns: t.List[exp.Expression] = [] 3104 for fld in pivot.args["field"].expressions: 3105 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3106 for name in names: 3107 if self.PREFIXED_PIVOT_COLUMNS: 3108 name = f"{name}_{field_name}" if name else field_name 3109 else: 3110 name = f"{field_name}_{name}" if name else field_name 3111 3112 columns.append(exp.to_identifier(name)) 3113 3114 pivot.set("columns", columns) 3115 3116 return pivot 3117 3118 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3119 return [agg.alias for agg in aggregations] 3120 3121 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3122 if not skip_where_token and not self._match(TokenType.WHERE): 3123 return None 3124 3125 return self.expression( 3126 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3127 ) 3128 3129 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3130 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3131 return None 3132 3133 elements = defaultdict(list) 3134 3135 if self._match(TokenType.ALL): 3136 return self.expression(exp.Group, all=True) 3137 3138 while True: 3139 expressions = self._parse_csv(self._parse_conjunction) 3140 if expressions: 3141 elements["expressions"].extend(expressions) 3142 3143 grouping_sets = self._parse_grouping_sets() 3144 if grouping_sets: 3145 elements["grouping_sets"].extend(grouping_sets) 3146 3147 rollup = None 3148 cube = None 3149 totals = None 3150 3151 index = self._index 3152 with_ = self._match(TokenType.WITH) 3153 if self._match(TokenType.ROLLUP): 3154 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3155 elements["rollup"].extend(ensure_list(rollup)) 3156 3157 if self._match(TokenType.CUBE): 3158 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3159 elements["cube"].extend(ensure_list(cube)) 3160 3161 if self._match_text_seq("TOTALS"): 3162 totals = True 3163 elements["totals"] = True # type: ignore 3164 3165 if not (grouping_sets or rollup or cube or totals): 3166 if with_: 3167 self._retreat(index) 3168 break 3169 3170 return self.expression(exp.Group, **elements) # type: ignore 3171 3172 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3173 if not self._match(TokenType.GROUPING_SETS): 3174 return None 3175 3176 return self._parse_wrapped_csv(self._parse_grouping_set) 3177 3178 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3179 if self._match(TokenType.L_PAREN): 3180 grouping_set = self._parse_csv(self._parse_column) 3181 self._match_r_paren() 3182 return self.expression(exp.Tuple, expressions=grouping_set) 3183 3184 return self._parse_column() 3185 3186 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3187 if not skip_having_token and not self._match(TokenType.HAVING): 3188 return None 3189 return self.expression(exp.Having, this=self._parse_conjunction()) 3190 3191 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3192 if not self._match(TokenType.QUALIFY): 3193 return None 3194 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3195 3196 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3197 if skip_start_token: 3198 start = None 3199 elif self._match(TokenType.START_WITH): 3200 start = self._parse_conjunction() 3201 else: 3202 return None 3203 3204 self._match(TokenType.CONNECT_BY) 3205 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3206 exp.Prior, this=self._parse_bitwise() 3207 ) 3208 connect = self._parse_conjunction() 3209 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3210 3211 if not start and self._match(TokenType.START_WITH): 3212 start = self._parse_conjunction() 3213 3214 return self.expression(exp.Connect, start=start, connect=connect) 3215 3216 def _parse_name_as_expression(self) -> exp.Alias: 3217 return self.expression( 3218 exp.Alias, 3219 alias=self._parse_id_var(any_token=True), 3220 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3221 ) 3222 3223 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3224 if self._match_text_seq("INTERPOLATE"): 3225 return self._parse_wrapped_csv(self._parse_name_as_expression) 3226 return None 3227 3228 def _parse_order( 3229 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3230 ) -> t.Optional[exp.Expression]: 3231 siblings = None 3232 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3233 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3234 return this 3235 3236 siblings = True 3237 3238 return self.expression( 3239 exp.Order, 3240 this=this, 3241 expressions=self._parse_csv(self._parse_ordered), 3242 interpolate=self._parse_interpolate(), 3243 siblings=siblings, 3244 ) 3245 3246 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3247 if not self._match(token): 3248 return None 3249 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3250 3251 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3252 this = parse_method() if parse_method else self._parse_conjunction() 3253 3254 asc = self._match(TokenType.ASC) 3255 desc = self._match(TokenType.DESC) or (asc and False) 3256 3257 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3258 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3259 3260 nulls_first = is_nulls_first or False 3261 explicitly_null_ordered = is_nulls_first or is_nulls_last 3262 3263 if ( 3264 not explicitly_null_ordered 3265 and ( 3266 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3267 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3268 ) 3269 and self.dialect.NULL_ORDERING != "nulls_are_last" 3270 ): 3271 nulls_first = True 3272 3273 if self._match_text_seq("WITH", "FILL"): 3274 with_fill = self.expression( 3275 exp.WithFill, 3276 **{ # type: ignore 3277 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3278 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3279 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3280 }, 3281 ) 3282 else: 3283 with_fill = None 3284 3285 return self.expression( 3286 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3287 ) 3288 3289 def _parse_limit( 3290 self, this: t.Optional[exp.Expression] = None, top: bool = False 3291 ) -> t.Optional[exp.Expression]: 3292 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3293 comments = self._prev_comments 3294 if top: 3295 limit_paren = self._match(TokenType.L_PAREN) 3296 expression = self._parse_term() if limit_paren else self._parse_number() 3297 3298 if limit_paren: 3299 self._match_r_paren() 3300 else: 3301 expression = self._parse_term() 3302 3303 if self._match(TokenType.COMMA): 3304 offset = expression 3305 expression = self._parse_term() 3306 else: 3307 offset = None 3308 3309 limit_exp = self.expression( 3310 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3311 ) 3312 3313 return limit_exp 3314 3315 if self._match(TokenType.FETCH): 3316 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3317 direction = self._prev.text.upper() if direction else "FIRST" 3318 3319 count = self._parse_field(tokens=self.FETCH_TOKENS) 3320 percent = self._match(TokenType.PERCENT) 3321 3322 self._match_set((TokenType.ROW, TokenType.ROWS)) 3323 3324 only = self._match_text_seq("ONLY") 3325 with_ties = self._match_text_seq("WITH", "TIES") 3326 3327 if only and with_ties: 3328 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3329 3330 return self.expression( 3331 exp.Fetch, 3332 direction=direction, 3333 count=count, 3334 percent=percent, 3335 with_ties=with_ties, 3336 ) 3337 3338 return this 3339 3340 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3341 if not self._match(TokenType.OFFSET): 3342 return this 3343 3344 count = self._parse_term() 3345 self._match_set((TokenType.ROW, TokenType.ROWS)) 3346 return self.expression(exp.Offset, this=this, expression=count) 3347 3348 def _parse_locks(self) -> t.List[exp.Lock]: 3349 locks = [] 3350 while True: 3351 if self._match_text_seq("FOR", "UPDATE"): 3352 update = True 3353 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3354 "LOCK", "IN", "SHARE", "MODE" 3355 ): 3356 update = False 3357 else: 3358 break 3359 3360 expressions = None 3361 if self._match_text_seq("OF"): 3362 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3363 3364 wait: t.Optional[bool | exp.Expression] = None 3365 if self._match_text_seq("NOWAIT"): 3366 wait = True 3367 elif self._match_text_seq("WAIT"): 3368 wait = self._parse_primary() 3369 elif self._match_text_seq("SKIP", "LOCKED"): 3370 wait = False 3371 3372 locks.append( 3373 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3374 ) 3375 3376 return locks 3377 3378 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3379 while this and self._match_set(self.SET_OPERATIONS): 3380 token_type = self._prev.token_type 3381 3382 if token_type == TokenType.UNION: 3383 operation = exp.Union 3384 elif token_type == TokenType.EXCEPT: 3385 operation = exp.Except 3386 else: 3387 operation = exp.Intersect 3388 3389 comments = self._prev.comments 3390 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3391 by_name = self._match_text_seq("BY", "NAME") 3392 expression = self._parse_select(nested=True, parse_set_operation=False) 3393 3394 this = self.expression( 3395 operation, 3396 comments=comments, 3397 this=this, 3398 distinct=distinct, 3399 by_name=by_name, 3400 expression=expression, 3401 ) 3402 3403 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3404 expression = this.expression 3405 3406 if expression: 3407 for arg in self.UNION_MODIFIERS: 3408 expr = expression.args.get(arg) 3409 if expr: 3410 this.set(arg, expr.pop()) 3411 3412 return this 3413 3414 def _parse_expression(self) -> t.Optional[exp.Expression]: 3415 return self._parse_alias(self._parse_conjunction()) 3416 3417 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3418 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3419 3420 def _parse_equality(self) -> t.Optional[exp.Expression]: 3421 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3422 3423 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3424 return self._parse_tokens(self._parse_range, self.COMPARISON) 3425 3426 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3427 this = this or self._parse_bitwise() 3428 negate = self._match(TokenType.NOT) 3429 3430 if self._match_set(self.RANGE_PARSERS): 3431 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3432 if not expression: 3433 return this 3434 3435 this = expression 3436 elif self._match(TokenType.ISNULL): 3437 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3438 3439 # Postgres supports ISNULL and NOTNULL for conditions. 3440 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3441 if self._match(TokenType.NOTNULL): 3442 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3443 this = self.expression(exp.Not, this=this) 3444 3445 if negate: 3446 this = self.expression(exp.Not, this=this) 3447 3448 if self._match(TokenType.IS): 3449 this = self._parse_is(this) 3450 3451 return this 3452 3453 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3454 index = self._index - 1 3455 negate = self._match(TokenType.NOT) 3456 3457 if self._match_text_seq("DISTINCT", "FROM"): 3458 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3459 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3460 3461 expression = self._parse_null() or self._parse_boolean() 3462 if not expression: 3463 self._retreat(index) 3464 return None 3465 3466 this = self.expression(exp.Is, this=this, expression=expression) 3467 return self.expression(exp.Not, this=this) if negate else this 3468 3469 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3470 unnest = self._parse_unnest(with_alias=False) 3471 if unnest: 3472 this = self.expression(exp.In, this=this, unnest=unnest) 3473 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3474 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3475 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3476 3477 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3478 this = self.expression(exp.In, this=this, query=expressions[0]) 3479 else: 3480 this = self.expression(exp.In, this=this, expressions=expressions) 3481 3482 if matched_l_paren: 3483 self._match_r_paren(this) 3484 elif not self._match(TokenType.R_BRACKET, expression=this): 3485 self.raise_error("Expecting ]") 3486 else: 3487 this = self.expression(exp.In, this=this, field=self._parse_field()) 3488 3489 return this 3490 3491 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3492 low = self._parse_bitwise() 3493 self._match(TokenType.AND) 3494 high = self._parse_bitwise() 3495 return self.expression(exp.Between, this=this, low=low, high=high) 3496 3497 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3498 if not self._match(TokenType.ESCAPE): 3499 return this 3500 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3501 3502 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3503 index = self._index 3504 3505 if not self._match(TokenType.INTERVAL) and match_interval: 3506 return None 3507 3508 if self._match(TokenType.STRING, advance=False): 3509 this = self._parse_primary() 3510 else: 3511 this = self._parse_term() 3512 3513 if not this or ( 3514 isinstance(this, exp.Column) 3515 and not this.table 3516 and not this.this.quoted 3517 and this.name.upper() == "IS" 3518 ): 3519 self._retreat(index) 3520 return None 3521 3522 unit = self._parse_function() or ( 3523 not self._match(TokenType.ALIAS, advance=False) 3524 and self._parse_var(any_token=True, upper=True) 3525 ) 3526 3527 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3528 # each INTERVAL expression into this canonical form so it's easy to transpile 3529 if this and this.is_number: 3530 this = exp.Literal.string(this.name) 3531 elif this and this.is_string: 3532 parts = this.name.split() 3533 3534 if len(parts) == 2: 3535 if unit: 3536 # This is not actually a unit, it's something else (e.g. a "window side") 3537 unit = None 3538 self._retreat(self._index - 1) 3539 3540 this = exp.Literal.string(parts[0]) 3541 unit = self.expression(exp.Var, this=parts[1].upper()) 3542 3543 return self.expression(exp.Interval, this=this, unit=unit) 3544 3545 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3546 this = self._parse_term() 3547 3548 while True: 3549 if self._match_set(self.BITWISE): 3550 this = self.expression( 3551 self.BITWISE[self._prev.token_type], 3552 this=this, 3553 expression=self._parse_term(), 3554 ) 3555 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3556 this = self.expression( 3557 exp.DPipe, 3558 this=this, 3559 expression=self._parse_term(), 3560 safe=not self.dialect.STRICT_STRING_CONCAT, 3561 ) 3562 elif self._match(TokenType.DQMARK): 3563 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3564 elif self._match_pair(TokenType.LT, TokenType.LT): 3565 this = self.expression( 3566 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3567 ) 3568 elif self._match_pair(TokenType.GT, TokenType.GT): 3569 this = self.expression( 3570 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3571 ) 3572 else: 3573 break 3574 3575 return this 3576 3577 def _parse_term(self) -> t.Optional[exp.Expression]: 3578 return self._parse_tokens(self._parse_factor, self.TERM) 3579 3580 def _parse_factor(self) -> t.Optional[exp.Expression]: 3581 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3582 this = parse_method() 3583 3584 while self._match_set(self.FACTOR): 3585 this = self.expression( 3586 self.FACTOR[self._prev.token_type], 3587 this=this, 3588 comments=self._prev_comments, 3589 expression=parse_method(), 3590 ) 3591 if isinstance(this, exp.Div): 3592 this.args["typed"] = self.dialect.TYPED_DIVISION 3593 this.args["safe"] = self.dialect.SAFE_DIVISION 3594 3595 return this 3596 3597 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3598 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3599 3600 def _parse_unary(self) -> t.Optional[exp.Expression]: 3601 if self._match_set(self.UNARY_PARSERS): 3602 return self.UNARY_PARSERS[self._prev.token_type](self) 3603 return self._parse_at_time_zone(self._parse_type()) 3604 3605 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3606 interval = parse_interval and self._parse_interval() 3607 if interval: 3608 # Convert INTERVAL 'val_1' unit_1 ... 'val_n' unit_n into a sum of intervals 3609 while self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3610 interval = self.expression( # type: ignore 3611 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3612 ) 3613 3614 return interval 3615 3616 index = self._index 3617 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3618 this = self._parse_column() 3619 3620 if data_type: 3621 if isinstance(this, exp.Literal): 3622 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3623 if parser: 3624 return parser(self, this, data_type) 3625 return self.expression(exp.Cast, this=this, to=data_type) 3626 if not data_type.expressions: 3627 self._retreat(index) 3628 return self._parse_column() 3629 return self._parse_column_ops(data_type) 3630 3631 return this and self._parse_column_ops(this) 3632 3633 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3634 this = self._parse_type() 3635 if not this: 3636 return None 3637 3638 return self.expression( 3639 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3640 ) 3641 3642 def _parse_types( 3643 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3644 ) -> t.Optional[exp.Expression]: 3645 index = self._index 3646 3647 prefix = self._match_text_seq("SYSUDTLIB", ".") 3648 3649 if not self._match_set(self.TYPE_TOKENS): 3650 identifier = allow_identifiers and self._parse_id_var( 3651 any_token=False, tokens=(TokenType.VAR,) 3652 ) 3653 3654 if identifier: 3655 tokens = self.dialect.tokenize(identifier.name) 3656 3657 if len(tokens) != 1: 3658 self.raise_error("Unexpected identifier", self._prev) 3659 3660 if tokens[0].token_type in self.TYPE_TOKENS: 3661 self._prev = tokens[0] 3662 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3663 type_name = identifier.name 3664 3665 while self._match(TokenType.DOT): 3666 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3667 3668 return exp.DataType.build(type_name, udt=True) 3669 else: 3670 self._retreat(self._index - 1) 3671 return None 3672 else: 3673 return None 3674 3675 type_token = self._prev.token_type 3676 3677 if type_token == TokenType.PSEUDO_TYPE: 3678 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3679 3680 if type_token == TokenType.OBJECT_IDENTIFIER: 3681 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3682 3683 nested = type_token in self.NESTED_TYPE_TOKENS 3684 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3685 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3686 expressions = None 3687 maybe_func = False 3688 3689 if self._match(TokenType.L_PAREN): 3690 if is_struct: 3691 expressions = self._parse_csv(self._parse_struct_types) 3692 elif nested: 3693 expressions = self._parse_csv( 3694 lambda: self._parse_types( 3695 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3696 ) 3697 ) 3698 elif type_token in self.ENUM_TYPE_TOKENS: 3699 expressions = self._parse_csv(self._parse_equality) 3700 elif is_aggregate: 3701 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3702 any_token=False, tokens=(TokenType.VAR,) 3703 ) 3704 if not func_or_ident or not self._match(TokenType.COMMA): 3705 return None 3706 expressions = self._parse_csv( 3707 lambda: self._parse_types( 3708 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3709 ) 3710 ) 3711 expressions.insert(0, func_or_ident) 3712 else: 3713 expressions = self._parse_csv(self._parse_type_size) 3714 3715 if not expressions or not self._match(TokenType.R_PAREN): 3716 self._retreat(index) 3717 return None 3718 3719 maybe_func = True 3720 3721 this: t.Optional[exp.Expression] = None 3722 values: t.Optional[t.List[exp.Expression]] = None 3723 3724 if nested and self._match(TokenType.LT): 3725 if is_struct: 3726 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 3727 else: 3728 expressions = self._parse_csv( 3729 lambda: self._parse_types( 3730 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3731 ) 3732 ) 3733 3734 if not self._match(TokenType.GT): 3735 self.raise_error("Expecting >") 3736 3737 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3738 values = self._parse_csv(self._parse_conjunction) 3739 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3740 3741 if type_token in self.TIMESTAMPS: 3742 if self._match_text_seq("WITH", "TIME", "ZONE"): 3743 maybe_func = False 3744 tz_type = ( 3745 exp.DataType.Type.TIMETZ 3746 if type_token in self.TIMES 3747 else exp.DataType.Type.TIMESTAMPTZ 3748 ) 3749 this = exp.DataType(this=tz_type, expressions=expressions) 3750 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3751 maybe_func = False 3752 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3753 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3754 maybe_func = False 3755 elif type_token == TokenType.INTERVAL: 3756 unit = self._parse_var() 3757 3758 if self._match_text_seq("TO"): 3759 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3760 else: 3761 span = None 3762 3763 if span or not unit: 3764 this = self.expression( 3765 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3766 ) 3767 else: 3768 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3769 3770 if maybe_func and check_func: 3771 index2 = self._index 3772 peek = self._parse_string() 3773 3774 if not peek: 3775 self._retreat(index) 3776 return None 3777 3778 self._retreat(index2) 3779 3780 if not this: 3781 if self._match_text_seq("UNSIGNED"): 3782 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3783 if not unsigned_type_token: 3784 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3785 3786 type_token = unsigned_type_token or type_token 3787 3788 this = exp.DataType( 3789 this=exp.DataType.Type[type_token.value], 3790 expressions=expressions, 3791 nested=nested, 3792 values=values, 3793 prefix=prefix, 3794 ) 3795 3796 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3797 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3798 3799 return this 3800 3801 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 3802 index = self._index 3803 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3804 self._match(TokenType.COLON) 3805 column_def = self._parse_column_def(this) 3806 3807 if type_required and ( 3808 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 3809 ): 3810 self._retreat(index) 3811 return self._parse_types() 3812 3813 return column_def 3814 3815 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3816 if not self._match_text_seq("AT", "TIME", "ZONE"): 3817 return this 3818 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3819 3820 def _parse_column(self) -> t.Optional[exp.Expression]: 3821 this = self._parse_field() 3822 if isinstance(this, exp.Identifier): 3823 this = self.expression(exp.Column, this=this) 3824 elif not this: 3825 return self._parse_bracket(this) 3826 return self._parse_column_ops(this) 3827 3828 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3829 this = self._parse_bracket(this) 3830 3831 while self._match_set(self.COLUMN_OPERATORS): 3832 op_token = self._prev.token_type 3833 op = self.COLUMN_OPERATORS.get(op_token) 3834 3835 if op_token == TokenType.DCOLON: 3836 field = self._parse_types() 3837 if not field: 3838 self.raise_error("Expected type") 3839 elif op and self._curr: 3840 self._advance() 3841 value = self._prev.text 3842 field = ( 3843 exp.Literal.number(value) 3844 if self._prev.token_type == TokenType.NUMBER 3845 else exp.Literal.string(value) 3846 ) 3847 else: 3848 field = self._parse_field(anonymous_func=True, any_token=True) 3849 3850 if isinstance(field, exp.Func): 3851 # bigquery allows function calls like x.y.count(...) 3852 # SAFE.SUBSTR(...) 3853 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3854 this = self._replace_columns_with_dots(this) 3855 3856 if op: 3857 this = op(self, this, field) 3858 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3859 this = self.expression( 3860 exp.Column, 3861 this=field, 3862 table=this.this, 3863 db=this.args.get("table"), 3864 catalog=this.args.get("db"), 3865 ) 3866 else: 3867 this = self.expression(exp.Dot, this=this, expression=field) 3868 this = self._parse_bracket(this) 3869 return this 3870 3871 def _parse_primary(self) -> t.Optional[exp.Expression]: 3872 if self._match_set(self.PRIMARY_PARSERS): 3873 token_type = self._prev.token_type 3874 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3875 3876 if token_type == TokenType.STRING: 3877 expressions = [primary] 3878 while self._match(TokenType.STRING): 3879 expressions.append(exp.Literal.string(self._prev.text)) 3880 3881 if len(expressions) > 1: 3882 return self.expression(exp.Concat, expressions=expressions) 3883 3884 return primary 3885 3886 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3887 return exp.Literal.number(f"0.{self._prev.text}") 3888 3889 if self._match(TokenType.L_PAREN): 3890 comments = self._prev_comments 3891 query = self._parse_select() 3892 3893 if query: 3894 expressions = [query] 3895 else: 3896 expressions = self._parse_expressions() 3897 3898 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3899 3900 if isinstance(this, exp.Subqueryable): 3901 this = self._parse_set_operations( 3902 self._parse_subquery(this=this, parse_alias=False) 3903 ) 3904 elif len(expressions) > 1: 3905 this = self.expression(exp.Tuple, expressions=expressions) 3906 else: 3907 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3908 3909 if this: 3910 this.add_comments(comments) 3911 3912 self._match_r_paren(expression=this) 3913 return this 3914 3915 return None 3916 3917 def _parse_field( 3918 self, 3919 any_token: bool = False, 3920 tokens: t.Optional[t.Collection[TokenType]] = None, 3921 anonymous_func: bool = False, 3922 ) -> t.Optional[exp.Expression]: 3923 return ( 3924 self._parse_primary() 3925 or self._parse_function(anonymous=anonymous_func) 3926 or self._parse_id_var(any_token=any_token, tokens=tokens) 3927 ) 3928 3929 def _parse_function( 3930 self, 3931 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3932 anonymous: bool = False, 3933 optional_parens: bool = True, 3934 ) -> t.Optional[exp.Expression]: 3935 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3936 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3937 fn_syntax = False 3938 if ( 3939 self._match(TokenType.L_BRACE, advance=False) 3940 and self._next 3941 and self._next.text.upper() == "FN" 3942 ): 3943 self._advance(2) 3944 fn_syntax = True 3945 3946 func = self._parse_function_call( 3947 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3948 ) 3949 3950 if fn_syntax: 3951 self._match(TokenType.R_BRACE) 3952 3953 return func 3954 3955 def _parse_function_call( 3956 self, 3957 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3958 anonymous: bool = False, 3959 optional_parens: bool = True, 3960 ) -> t.Optional[exp.Expression]: 3961 if not self._curr: 3962 return None 3963 3964 comments = self._curr.comments 3965 token_type = self._curr.token_type 3966 this = self._curr.text 3967 upper = this.upper() 3968 3969 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3970 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3971 self._advance() 3972 return parser(self) 3973 3974 if not self._next or self._next.token_type != TokenType.L_PAREN: 3975 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3976 self._advance() 3977 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3978 3979 return None 3980 3981 if token_type not in self.FUNC_TOKENS: 3982 return None 3983 3984 self._advance(2) 3985 3986 parser = self.FUNCTION_PARSERS.get(upper) 3987 if parser and not anonymous: 3988 this = parser(self) 3989 else: 3990 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3991 3992 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3993 this = self.expression(subquery_predicate, this=self._parse_select()) 3994 self._match_r_paren() 3995 return this 3996 3997 if functions is None: 3998 functions = self.FUNCTIONS 3999 4000 function = functions.get(upper) 4001 4002 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4003 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4004 4005 if function and not anonymous: 4006 if "dialect" in function.__code__.co_varnames: 4007 func = function(args, dialect=self.dialect) 4008 else: 4009 func = function(args) 4010 4011 func = self.validate_expression(func, args) 4012 if not self.dialect.NORMALIZE_FUNCTIONS: 4013 func.meta["name"] = this 4014 4015 this = func 4016 else: 4017 this = self.expression(exp.Anonymous, this=this, expressions=args) 4018 4019 if isinstance(this, exp.Expression): 4020 this.add_comments(comments) 4021 4022 self._match_r_paren(this) 4023 return self._parse_window(this) 4024 4025 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4026 return self._parse_column_def(self._parse_id_var()) 4027 4028 def _parse_user_defined_function( 4029 self, kind: t.Optional[TokenType] = None 4030 ) -> t.Optional[exp.Expression]: 4031 this = self._parse_id_var() 4032 4033 while self._match(TokenType.DOT): 4034 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4035 4036 if not self._match(TokenType.L_PAREN): 4037 return this 4038 4039 expressions = self._parse_csv(self._parse_function_parameter) 4040 self._match_r_paren() 4041 return self.expression( 4042 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4043 ) 4044 4045 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4046 literal = self._parse_primary() 4047 if literal: 4048 return self.expression(exp.Introducer, this=token.text, expression=literal) 4049 4050 return self.expression(exp.Identifier, this=token.text) 4051 4052 def _parse_session_parameter(self) -> exp.SessionParameter: 4053 kind = None 4054 this = self._parse_id_var() or self._parse_primary() 4055 4056 if this and self._match(TokenType.DOT): 4057 kind = this.name 4058 this = self._parse_var() or self._parse_primary() 4059 4060 return self.expression(exp.SessionParameter, this=this, kind=kind) 4061 4062 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4063 index = self._index 4064 4065 if self._match(TokenType.L_PAREN): 4066 expressions = t.cast( 4067 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4068 ) 4069 4070 if not self._match(TokenType.R_PAREN): 4071 self._retreat(index) 4072 else: 4073 expressions = [self._parse_id_var()] 4074 4075 if self._match_set(self.LAMBDAS): 4076 return self.LAMBDAS[self._prev.token_type](self, expressions) 4077 4078 self._retreat(index) 4079 4080 this: t.Optional[exp.Expression] 4081 4082 if self._match(TokenType.DISTINCT): 4083 this = self.expression( 4084 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4085 ) 4086 else: 4087 this = self._parse_select_or_expression(alias=alias) 4088 4089 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 4090 4091 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4092 index = self._index 4093 4094 if not self.errors: 4095 try: 4096 if self._parse_select(nested=True): 4097 return this 4098 except ParseError: 4099 pass 4100 finally: 4101 self.errors.clear() 4102 self._retreat(index) 4103 4104 if not self._match(TokenType.L_PAREN): 4105 return this 4106 4107 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4108 4109 self._match_r_paren() 4110 return self.expression(exp.Schema, this=this, expressions=args) 4111 4112 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4113 return self._parse_column_def(self._parse_field(any_token=True)) 4114 4115 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4116 # column defs are not really columns, they're identifiers 4117 if isinstance(this, exp.Column): 4118 this = this.this 4119 4120 kind = self._parse_types(schema=True) 4121 4122 if self._match_text_seq("FOR", "ORDINALITY"): 4123 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4124 4125 constraints: t.List[exp.Expression] = [] 4126 4127 if not kind and self._match(TokenType.ALIAS): 4128 constraints.append( 4129 self.expression( 4130 exp.ComputedColumnConstraint, 4131 this=self._parse_conjunction(), 4132 persisted=self._match_text_seq("PERSISTED"), 4133 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4134 ) 4135 ) 4136 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4137 self._match(TokenType.ALIAS) 4138 constraints.append( 4139 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4140 ) 4141 4142 while True: 4143 constraint = self._parse_column_constraint() 4144 if not constraint: 4145 break 4146 constraints.append(constraint) 4147 4148 if not kind and not constraints: 4149 return this 4150 4151 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4152 4153 def _parse_auto_increment( 4154 self, 4155 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4156 start = None 4157 increment = None 4158 4159 if self._match(TokenType.L_PAREN, advance=False): 4160 args = self._parse_wrapped_csv(self._parse_bitwise) 4161 start = seq_get(args, 0) 4162 increment = seq_get(args, 1) 4163 elif self._match_text_seq("START"): 4164 start = self._parse_bitwise() 4165 self._match_text_seq("INCREMENT") 4166 increment = self._parse_bitwise() 4167 4168 if start and increment: 4169 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4170 4171 return exp.AutoIncrementColumnConstraint() 4172 4173 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4174 if not self._match_text_seq("REFRESH"): 4175 self._retreat(self._index - 1) 4176 return None 4177 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4178 4179 def _parse_compress(self) -> exp.CompressColumnConstraint: 4180 if self._match(TokenType.L_PAREN, advance=False): 4181 return self.expression( 4182 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4183 ) 4184 4185 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4186 4187 def _parse_generated_as_identity( 4188 self, 4189 ) -> ( 4190 exp.GeneratedAsIdentityColumnConstraint 4191 | exp.ComputedColumnConstraint 4192 | exp.GeneratedAsRowColumnConstraint 4193 ): 4194 if self._match_text_seq("BY", "DEFAULT"): 4195 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4196 this = self.expression( 4197 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4198 ) 4199 else: 4200 self._match_text_seq("ALWAYS") 4201 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4202 4203 self._match(TokenType.ALIAS) 4204 4205 if self._match_text_seq("ROW"): 4206 start = self._match_text_seq("START") 4207 if not start: 4208 self._match(TokenType.END) 4209 hidden = self._match_text_seq("HIDDEN") 4210 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4211 4212 identity = self._match_text_seq("IDENTITY") 4213 4214 if self._match(TokenType.L_PAREN): 4215 if self._match(TokenType.START_WITH): 4216 this.set("start", self._parse_bitwise()) 4217 if self._match_text_seq("INCREMENT", "BY"): 4218 this.set("increment", self._parse_bitwise()) 4219 if self._match_text_seq("MINVALUE"): 4220 this.set("minvalue", self._parse_bitwise()) 4221 if self._match_text_seq("MAXVALUE"): 4222 this.set("maxvalue", self._parse_bitwise()) 4223 4224 if self._match_text_seq("CYCLE"): 4225 this.set("cycle", True) 4226 elif self._match_text_seq("NO", "CYCLE"): 4227 this.set("cycle", False) 4228 4229 if not identity: 4230 this.set("expression", self._parse_bitwise()) 4231 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4232 args = self._parse_csv(self._parse_bitwise) 4233 this.set("start", seq_get(args, 0)) 4234 this.set("increment", seq_get(args, 1)) 4235 4236 self._match_r_paren() 4237 4238 return this 4239 4240 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4241 self._match_text_seq("LENGTH") 4242 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4243 4244 def _parse_not_constraint( 4245 self, 4246 ) -> t.Optional[exp.Expression]: 4247 if self._match_text_seq("NULL"): 4248 return self.expression(exp.NotNullColumnConstraint) 4249 if self._match_text_seq("CASESPECIFIC"): 4250 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4251 if self._match_text_seq("FOR", "REPLICATION"): 4252 return self.expression(exp.NotForReplicationColumnConstraint) 4253 return None 4254 4255 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4256 if self._match(TokenType.CONSTRAINT): 4257 this = self._parse_id_var() 4258 else: 4259 this = None 4260 4261 if self._match_texts(self.CONSTRAINT_PARSERS): 4262 return self.expression( 4263 exp.ColumnConstraint, 4264 this=this, 4265 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4266 ) 4267 4268 return this 4269 4270 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4271 if not self._match(TokenType.CONSTRAINT): 4272 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4273 4274 this = self._parse_id_var() 4275 expressions = [] 4276 4277 while True: 4278 constraint = self._parse_unnamed_constraint() or self._parse_function() 4279 if not constraint: 4280 break 4281 expressions.append(constraint) 4282 4283 return self.expression(exp.Constraint, this=this, expressions=expressions) 4284 4285 def _parse_unnamed_constraint( 4286 self, constraints: t.Optional[t.Collection[str]] = None 4287 ) -> t.Optional[exp.Expression]: 4288 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4289 constraints or self.CONSTRAINT_PARSERS 4290 ): 4291 return None 4292 4293 constraint = self._prev.text.upper() 4294 if constraint not in self.CONSTRAINT_PARSERS: 4295 self.raise_error(f"No parser found for schema constraint {constraint}.") 4296 4297 return self.CONSTRAINT_PARSERS[constraint](self) 4298 4299 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4300 self._match_text_seq("KEY") 4301 return self.expression( 4302 exp.UniqueColumnConstraint, 4303 this=self._parse_schema(self._parse_id_var(any_token=False)), 4304 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4305 ) 4306 4307 def _parse_key_constraint_options(self) -> t.List[str]: 4308 options = [] 4309 while True: 4310 if not self._curr: 4311 break 4312 4313 if self._match(TokenType.ON): 4314 action = None 4315 on = self._advance_any() and self._prev.text 4316 4317 if self._match_text_seq("NO", "ACTION"): 4318 action = "NO ACTION" 4319 elif self._match_text_seq("CASCADE"): 4320 action = "CASCADE" 4321 elif self._match_text_seq("RESTRICT"): 4322 action = "RESTRICT" 4323 elif self._match_pair(TokenType.SET, TokenType.NULL): 4324 action = "SET NULL" 4325 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4326 action = "SET DEFAULT" 4327 else: 4328 self.raise_error("Invalid key constraint") 4329 4330 options.append(f"ON {on} {action}") 4331 elif self._match_text_seq("NOT", "ENFORCED"): 4332 options.append("NOT ENFORCED") 4333 elif self._match_text_seq("DEFERRABLE"): 4334 options.append("DEFERRABLE") 4335 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4336 options.append("INITIALLY DEFERRED") 4337 elif self._match_text_seq("NORELY"): 4338 options.append("NORELY") 4339 elif self._match_text_seq("MATCH", "FULL"): 4340 options.append("MATCH FULL") 4341 else: 4342 break 4343 4344 return options 4345 4346 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4347 if match and not self._match(TokenType.REFERENCES): 4348 return None 4349 4350 expressions = None 4351 this = self._parse_table(schema=True) 4352 options = self._parse_key_constraint_options() 4353 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4354 4355 def _parse_foreign_key(self) -> exp.ForeignKey: 4356 expressions = self._parse_wrapped_id_vars() 4357 reference = self._parse_references() 4358 options = {} 4359 4360 while self._match(TokenType.ON): 4361 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4362 self.raise_error("Expected DELETE or UPDATE") 4363 4364 kind = self._prev.text.lower() 4365 4366 if self._match_text_seq("NO", "ACTION"): 4367 action = "NO ACTION" 4368 elif self._match(TokenType.SET): 4369 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4370 action = "SET " + self._prev.text.upper() 4371 else: 4372 self._advance() 4373 action = self._prev.text.upper() 4374 4375 options[kind] = action 4376 4377 return self.expression( 4378 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4379 ) 4380 4381 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4382 return self._parse_field() 4383 4384 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4385 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4386 self._retreat(self._index - 1) 4387 return None 4388 4389 id_vars = self._parse_wrapped_id_vars() 4390 return self.expression( 4391 exp.PeriodForSystemTimeConstraint, 4392 this=seq_get(id_vars, 0), 4393 expression=seq_get(id_vars, 1), 4394 ) 4395 4396 def _parse_primary_key( 4397 self, wrapped_optional: bool = False, in_props: bool = False 4398 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4399 desc = ( 4400 self._match_set((TokenType.ASC, TokenType.DESC)) 4401 and self._prev.token_type == TokenType.DESC 4402 ) 4403 4404 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4405 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4406 4407 expressions = self._parse_wrapped_csv( 4408 self._parse_primary_key_part, optional=wrapped_optional 4409 ) 4410 options = self._parse_key_constraint_options() 4411 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4412 4413 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4414 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4415 4416 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4417 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4418 return this 4419 4420 bracket_kind = self._prev.token_type 4421 expressions = self._parse_csv( 4422 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4423 ) 4424 4425 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4426 self.raise_error("Expected ]") 4427 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4428 self.raise_error("Expected }") 4429 4430 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4431 if bracket_kind == TokenType.L_BRACE: 4432 this = self.expression(exp.Struct, expressions=expressions) 4433 elif not this or this.name.upper() == "ARRAY": 4434 this = self.expression(exp.Array, expressions=expressions) 4435 else: 4436 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4437 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4438 4439 self._add_comments(this) 4440 return self._parse_bracket(this) 4441 4442 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4443 if self._match(TokenType.COLON): 4444 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4445 return this 4446 4447 def _parse_case(self) -> t.Optional[exp.Expression]: 4448 ifs = [] 4449 default = None 4450 4451 comments = self._prev_comments 4452 expression = self._parse_conjunction() 4453 4454 while self._match(TokenType.WHEN): 4455 this = self._parse_conjunction() 4456 self._match(TokenType.THEN) 4457 then = self._parse_conjunction() 4458 ifs.append(self.expression(exp.If, this=this, true=then)) 4459 4460 if self._match(TokenType.ELSE): 4461 default = self._parse_conjunction() 4462 4463 if not self._match(TokenType.END): 4464 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4465 default = exp.column("interval") 4466 else: 4467 self.raise_error("Expected END after CASE", self._prev) 4468 4469 return self._parse_window( 4470 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4471 ) 4472 4473 def _parse_if(self) -> t.Optional[exp.Expression]: 4474 if self._match(TokenType.L_PAREN): 4475 args = self._parse_csv(self._parse_conjunction) 4476 this = self.validate_expression(exp.If.from_arg_list(args), args) 4477 self._match_r_paren() 4478 else: 4479 index = self._index - 1 4480 4481 if self.NO_PAREN_IF_COMMANDS and index == 0: 4482 return self._parse_as_command(self._prev) 4483 4484 condition = self._parse_conjunction() 4485 4486 if not condition: 4487 self._retreat(index) 4488 return None 4489 4490 self._match(TokenType.THEN) 4491 true = self._parse_conjunction() 4492 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4493 self._match(TokenType.END) 4494 this = self.expression(exp.If, this=condition, true=true, false=false) 4495 4496 return self._parse_window(this) 4497 4498 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4499 if not self._match_text_seq("VALUE", "FOR"): 4500 self._retreat(self._index - 1) 4501 return None 4502 4503 return self.expression( 4504 exp.NextValueFor, 4505 this=self._parse_column(), 4506 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4507 ) 4508 4509 def _parse_extract(self) -> exp.Extract: 4510 this = self._parse_function() or self._parse_var() or self._parse_type() 4511 4512 if self._match(TokenType.FROM): 4513 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4514 4515 if not self._match(TokenType.COMMA): 4516 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4517 4518 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4519 4520 def _parse_any_value(self) -> exp.AnyValue: 4521 this = self._parse_lambda() 4522 is_max = None 4523 having = None 4524 4525 if self._match(TokenType.HAVING): 4526 self._match_texts(("MAX", "MIN")) 4527 is_max = self._prev.text == "MAX" 4528 having = self._parse_column() 4529 4530 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4531 4532 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4533 this = self._parse_conjunction() 4534 4535 if not self._match(TokenType.ALIAS): 4536 if self._match(TokenType.COMMA): 4537 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4538 4539 self.raise_error("Expected AS after CAST") 4540 4541 fmt = None 4542 to = self._parse_types() 4543 4544 if self._match(TokenType.FORMAT): 4545 fmt_string = self._parse_string() 4546 fmt = self._parse_at_time_zone(fmt_string) 4547 4548 if not to: 4549 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4550 if to.this in exp.DataType.TEMPORAL_TYPES: 4551 this = self.expression( 4552 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4553 this=this, 4554 format=exp.Literal.string( 4555 format_time( 4556 fmt_string.this if fmt_string else "", 4557 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4558 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4559 ) 4560 ), 4561 ) 4562 4563 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4564 this.set("zone", fmt.args["zone"]) 4565 return this 4566 elif not to: 4567 self.raise_error("Expected TYPE after CAST") 4568 elif isinstance(to, exp.Identifier): 4569 to = exp.DataType.build(to.name, udt=True) 4570 elif to.this == exp.DataType.Type.CHAR: 4571 if self._match(TokenType.CHARACTER_SET): 4572 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4573 4574 return self.expression( 4575 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4576 ) 4577 4578 def _parse_string_agg(self) -> exp.Expression: 4579 if self._match(TokenType.DISTINCT): 4580 args: t.List[t.Optional[exp.Expression]] = [ 4581 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4582 ] 4583 if self._match(TokenType.COMMA): 4584 args.extend(self._parse_csv(self._parse_conjunction)) 4585 else: 4586 args = self._parse_csv(self._parse_conjunction) # type: ignore 4587 4588 index = self._index 4589 if not self._match(TokenType.R_PAREN) and args: 4590 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4591 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4592 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4593 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4594 4595 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4596 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4597 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4598 if not self._match_text_seq("WITHIN", "GROUP"): 4599 self._retreat(index) 4600 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4601 4602 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4603 order = self._parse_order(this=seq_get(args, 0)) 4604 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4605 4606 def _parse_convert( 4607 self, strict: bool, safe: t.Optional[bool] = None 4608 ) -> t.Optional[exp.Expression]: 4609 this = self._parse_bitwise() 4610 4611 if self._match(TokenType.USING): 4612 to: t.Optional[exp.Expression] = self.expression( 4613 exp.CharacterSet, this=self._parse_var() 4614 ) 4615 elif self._match(TokenType.COMMA): 4616 to = self._parse_types() 4617 else: 4618 to = None 4619 4620 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4621 4622 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4623 """ 4624 There are generally two variants of the DECODE function: 4625 4626 - DECODE(bin, charset) 4627 - DECODE(expression, search, result [, search, result] ... [, default]) 4628 4629 The second variant will always be parsed into a CASE expression. Note that NULL 4630 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4631 instead of relying on pattern matching. 4632 """ 4633 args = self._parse_csv(self._parse_conjunction) 4634 4635 if len(args) < 3: 4636 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4637 4638 expression, *expressions = args 4639 if not expression: 4640 return None 4641 4642 ifs = [] 4643 for search, result in zip(expressions[::2], expressions[1::2]): 4644 if not search or not result: 4645 return None 4646 4647 if isinstance(search, exp.Literal): 4648 ifs.append( 4649 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4650 ) 4651 elif isinstance(search, exp.Null): 4652 ifs.append( 4653 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4654 ) 4655 else: 4656 cond = exp.or_( 4657 exp.EQ(this=expression.copy(), expression=search), 4658 exp.and_( 4659 exp.Is(this=expression.copy(), expression=exp.Null()), 4660 exp.Is(this=search.copy(), expression=exp.Null()), 4661 copy=False, 4662 ), 4663 copy=False, 4664 ) 4665 ifs.append(exp.If(this=cond, true=result)) 4666 4667 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4668 4669 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4670 self._match_text_seq("KEY") 4671 key = self._parse_column() 4672 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4673 self._match_text_seq("VALUE") 4674 value = self._parse_bitwise() 4675 4676 if not key and not value: 4677 return None 4678 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4679 4680 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4681 if not this or not self._match_text_seq("FORMAT", "JSON"): 4682 return this 4683 4684 return self.expression(exp.FormatJson, this=this) 4685 4686 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4687 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4688 for value in values: 4689 if self._match_text_seq(value, "ON", on): 4690 return f"{value} ON {on}" 4691 4692 return None 4693 4694 @t.overload 4695 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 4696 4697 @t.overload 4698 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 4699 4700 def _parse_json_object(self, agg=False): 4701 star = self._parse_star() 4702 expressions = ( 4703 [star] 4704 if star 4705 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4706 ) 4707 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4708 4709 unique_keys = None 4710 if self._match_text_seq("WITH", "UNIQUE"): 4711 unique_keys = True 4712 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4713 unique_keys = False 4714 4715 self._match_text_seq("KEYS") 4716 4717 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4718 self._parse_type() 4719 ) 4720 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4721 4722 return self.expression( 4723 exp.JSONObjectAgg if agg else exp.JSONObject, 4724 expressions=expressions, 4725 null_handling=null_handling, 4726 unique_keys=unique_keys, 4727 return_type=return_type, 4728 encoding=encoding, 4729 ) 4730 4731 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4732 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4733 if not self._match_text_seq("NESTED"): 4734 this = self._parse_id_var() 4735 kind = self._parse_types(allow_identifiers=False) 4736 nested = None 4737 else: 4738 this = None 4739 kind = None 4740 nested = True 4741 4742 path = self._match_text_seq("PATH") and self._parse_string() 4743 nested_schema = nested and self._parse_json_schema() 4744 4745 return self.expression( 4746 exp.JSONColumnDef, 4747 this=this, 4748 kind=kind, 4749 path=path, 4750 nested_schema=nested_schema, 4751 ) 4752 4753 def _parse_json_schema(self) -> exp.JSONSchema: 4754 self._match_text_seq("COLUMNS") 4755 return self.expression( 4756 exp.JSONSchema, 4757 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4758 ) 4759 4760 def _parse_json_table(self) -> exp.JSONTable: 4761 this = self._parse_format_json(self._parse_bitwise()) 4762 path = self._match(TokenType.COMMA) and self._parse_string() 4763 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4764 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4765 schema = self._parse_json_schema() 4766 4767 return exp.JSONTable( 4768 this=this, 4769 schema=schema, 4770 path=path, 4771 error_handling=error_handling, 4772 empty_handling=empty_handling, 4773 ) 4774 4775 def _parse_match_against(self) -> exp.MatchAgainst: 4776 expressions = self._parse_csv(self._parse_column) 4777 4778 self._match_text_seq(")", "AGAINST", "(") 4779 4780 this = self._parse_string() 4781 4782 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4783 modifier = "IN NATURAL LANGUAGE MODE" 4784 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4785 modifier = f"{modifier} WITH QUERY EXPANSION" 4786 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4787 modifier = "IN BOOLEAN MODE" 4788 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4789 modifier = "WITH QUERY EXPANSION" 4790 else: 4791 modifier = None 4792 4793 return self.expression( 4794 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4795 ) 4796 4797 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4798 def _parse_open_json(self) -> exp.OpenJSON: 4799 this = self._parse_bitwise() 4800 path = self._match(TokenType.COMMA) and self._parse_string() 4801 4802 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4803 this = self._parse_field(any_token=True) 4804 kind = self._parse_types() 4805 path = self._parse_string() 4806 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4807 4808 return self.expression( 4809 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4810 ) 4811 4812 expressions = None 4813 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4814 self._match_l_paren() 4815 expressions = self._parse_csv(_parse_open_json_column_def) 4816 4817 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4818 4819 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4820 args = self._parse_csv(self._parse_bitwise) 4821 4822 if self._match(TokenType.IN): 4823 return self.expression( 4824 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4825 ) 4826 4827 if haystack_first: 4828 haystack = seq_get(args, 0) 4829 needle = seq_get(args, 1) 4830 else: 4831 needle = seq_get(args, 0) 4832 haystack = seq_get(args, 1) 4833 4834 return self.expression( 4835 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4836 ) 4837 4838 def _parse_predict(self) -> exp.Predict: 4839 self._match_text_seq("MODEL") 4840 this = self._parse_table() 4841 4842 self._match(TokenType.COMMA) 4843 self._match_text_seq("TABLE") 4844 4845 return self.expression( 4846 exp.Predict, 4847 this=this, 4848 expression=self._parse_table(), 4849 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4850 ) 4851 4852 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4853 args = self._parse_csv(self._parse_table) 4854 return exp.JoinHint(this=func_name.upper(), expressions=args) 4855 4856 def _parse_substring(self) -> exp.Substring: 4857 # Postgres supports the form: substring(string [from int] [for int]) 4858 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4859 4860 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4861 4862 if self._match(TokenType.FROM): 4863 args.append(self._parse_bitwise()) 4864 if self._match(TokenType.FOR): 4865 args.append(self._parse_bitwise()) 4866 4867 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4868 4869 def _parse_trim(self) -> exp.Trim: 4870 # https://www.w3resource.com/sql/character-functions/trim.php 4871 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4872 4873 position = None 4874 collation = None 4875 expression = None 4876 4877 if self._match_texts(self.TRIM_TYPES): 4878 position = self._prev.text.upper() 4879 4880 this = self._parse_bitwise() 4881 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4882 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4883 expression = self._parse_bitwise() 4884 4885 if invert_order: 4886 this, expression = expression, this 4887 4888 if self._match(TokenType.COLLATE): 4889 collation = self._parse_bitwise() 4890 4891 return self.expression( 4892 exp.Trim, this=this, position=position, expression=expression, collation=collation 4893 ) 4894 4895 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4896 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4897 4898 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4899 return self._parse_window(self._parse_id_var(), alias=True) 4900 4901 def _parse_respect_or_ignore_nulls( 4902 self, this: t.Optional[exp.Expression] 4903 ) -> t.Optional[exp.Expression]: 4904 if self._match_text_seq("IGNORE", "NULLS"): 4905 return self.expression(exp.IgnoreNulls, this=this) 4906 if self._match_text_seq("RESPECT", "NULLS"): 4907 return self.expression(exp.RespectNulls, this=this) 4908 return this 4909 4910 def _parse_window( 4911 self, this: t.Optional[exp.Expression], alias: bool = False 4912 ) -> t.Optional[exp.Expression]: 4913 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4914 self._match(TokenType.WHERE) 4915 this = self.expression( 4916 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4917 ) 4918 self._match_r_paren() 4919 4920 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4921 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4922 if self._match_text_seq("WITHIN", "GROUP"): 4923 order = self._parse_wrapped(self._parse_order) 4924 this = self.expression(exp.WithinGroup, this=this, expression=order) 4925 4926 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4927 # Some dialects choose to implement and some do not. 4928 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4929 4930 # There is some code above in _parse_lambda that handles 4931 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4932 4933 # The below changes handle 4934 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4935 4936 # Oracle allows both formats 4937 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4938 # and Snowflake chose to do the same for familiarity 4939 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4940 this = self._parse_respect_or_ignore_nulls(this) 4941 4942 # bigquery select from window x AS (partition by ...) 4943 if alias: 4944 over = None 4945 self._match(TokenType.ALIAS) 4946 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4947 return this 4948 else: 4949 over = self._prev.text.upper() 4950 4951 if not self._match(TokenType.L_PAREN): 4952 return self.expression( 4953 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4954 ) 4955 4956 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4957 4958 first = self._match(TokenType.FIRST) 4959 if self._match_text_seq("LAST"): 4960 first = False 4961 4962 partition, order = self._parse_partition_and_order() 4963 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4964 4965 if kind: 4966 self._match(TokenType.BETWEEN) 4967 start = self._parse_window_spec() 4968 self._match(TokenType.AND) 4969 end = self._parse_window_spec() 4970 4971 spec = self.expression( 4972 exp.WindowSpec, 4973 kind=kind, 4974 start=start["value"], 4975 start_side=start["side"], 4976 end=end["value"], 4977 end_side=end["side"], 4978 ) 4979 else: 4980 spec = None 4981 4982 self._match_r_paren() 4983 4984 window = self.expression( 4985 exp.Window, 4986 this=this, 4987 partition_by=partition, 4988 order=order, 4989 spec=spec, 4990 alias=window_alias, 4991 over=over, 4992 first=first, 4993 ) 4994 4995 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4996 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4997 return self._parse_window(window, alias=alias) 4998 4999 return window 5000 5001 def _parse_partition_and_order( 5002 self, 5003 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5004 return self._parse_partition_by(), self._parse_order() 5005 5006 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5007 self._match(TokenType.BETWEEN) 5008 5009 return { 5010 "value": ( 5011 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5012 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5013 or self._parse_bitwise() 5014 ), 5015 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5016 } 5017 5018 def _parse_alias( 5019 self, this: t.Optional[exp.Expression], explicit: bool = False 5020 ) -> t.Optional[exp.Expression]: 5021 any_token = self._match(TokenType.ALIAS) 5022 comments = self._prev_comments 5023 5024 if explicit and not any_token: 5025 return this 5026 5027 if self._match(TokenType.L_PAREN): 5028 aliases = self.expression( 5029 exp.Aliases, 5030 comments=comments, 5031 this=this, 5032 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5033 ) 5034 self._match_r_paren(aliases) 5035 return aliases 5036 5037 alias = self._parse_id_var(any_token) or ( 5038 self.STRING_ALIASES and self._parse_string_as_identifier() 5039 ) 5040 5041 if alias: 5042 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5043 column = this.this 5044 5045 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5046 if not this.comments and column and column.comments: 5047 this.comments = column.comments 5048 column.comments = None 5049 5050 return this 5051 5052 def _parse_id_var( 5053 self, 5054 any_token: bool = True, 5055 tokens: t.Optional[t.Collection[TokenType]] = None, 5056 ) -> t.Optional[exp.Expression]: 5057 identifier = self._parse_identifier() 5058 5059 if identifier: 5060 return identifier 5061 5062 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5063 quoted = self._prev.token_type == TokenType.STRING 5064 return exp.Identifier(this=self._prev.text, quoted=quoted) 5065 5066 return None 5067 5068 def _parse_string(self) -> t.Optional[exp.Expression]: 5069 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 5070 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 5071 return self._parse_placeholder() 5072 5073 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5074 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5075 5076 def _parse_number(self) -> t.Optional[exp.Expression]: 5077 if self._match(TokenType.NUMBER): 5078 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 5079 return self._parse_placeholder() 5080 5081 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5082 if self._match(TokenType.IDENTIFIER): 5083 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5084 return self._parse_placeholder() 5085 5086 def _parse_var( 5087 self, 5088 any_token: bool = False, 5089 tokens: t.Optional[t.Collection[TokenType]] = None, 5090 upper: bool = False, 5091 ) -> t.Optional[exp.Expression]: 5092 if ( 5093 (any_token and self._advance_any()) 5094 or self._match(TokenType.VAR) 5095 or (self._match_set(tokens) if tokens else False) 5096 ): 5097 return self.expression( 5098 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5099 ) 5100 return self._parse_placeholder() 5101 5102 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5103 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5104 self._advance() 5105 return self._prev 5106 return None 5107 5108 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5109 return self._parse_var() or self._parse_string() 5110 5111 def _parse_null(self) -> t.Optional[exp.Expression]: 5112 if self._match_set(self.NULL_TOKENS): 5113 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5114 return self._parse_placeholder() 5115 5116 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5117 if self._match(TokenType.TRUE): 5118 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5119 if self._match(TokenType.FALSE): 5120 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5121 return self._parse_placeholder() 5122 5123 def _parse_star(self) -> t.Optional[exp.Expression]: 5124 if self._match(TokenType.STAR): 5125 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5126 return self._parse_placeholder() 5127 5128 def _parse_parameter(self) -> exp.Parameter: 5129 def _parse_parameter_part() -> t.Optional[exp.Expression]: 5130 return ( 5131 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 5132 ) 5133 5134 self._match(TokenType.L_BRACE) 5135 this = _parse_parameter_part() 5136 expression = self._match(TokenType.COLON) and _parse_parameter_part() 5137 self._match(TokenType.R_BRACE) 5138 5139 return self.expression(exp.Parameter, this=this, expression=expression) 5140 5141 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5142 if self._match_set(self.PLACEHOLDER_PARSERS): 5143 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5144 if placeholder: 5145 return placeholder 5146 self._advance(-1) 5147 return None 5148 5149 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5150 if not self._match(TokenType.EXCEPT): 5151 return None 5152 if self._match(TokenType.L_PAREN, advance=False): 5153 return self._parse_wrapped_csv(self._parse_column) 5154 5155 except_column = self._parse_column() 5156 return [except_column] if except_column else None 5157 5158 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5159 if not self._match(TokenType.REPLACE): 5160 return None 5161 if self._match(TokenType.L_PAREN, advance=False): 5162 return self._parse_wrapped_csv(self._parse_expression) 5163 5164 replace_expression = self._parse_expression() 5165 return [replace_expression] if replace_expression else None 5166 5167 def _parse_csv( 5168 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5169 ) -> t.List[exp.Expression]: 5170 parse_result = parse_method() 5171 items = [parse_result] if parse_result is not None else [] 5172 5173 while self._match(sep): 5174 self._add_comments(parse_result) 5175 parse_result = parse_method() 5176 if parse_result is not None: 5177 items.append(parse_result) 5178 5179 return items 5180 5181 def _parse_tokens( 5182 self, parse_method: t.Callable, expressions: t.Dict 5183 ) -> t.Optional[exp.Expression]: 5184 this = parse_method() 5185 5186 while self._match_set(expressions): 5187 this = self.expression( 5188 expressions[self._prev.token_type], 5189 this=this, 5190 comments=self._prev_comments, 5191 expression=parse_method(), 5192 ) 5193 5194 return this 5195 5196 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5197 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5198 5199 def _parse_wrapped_csv( 5200 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5201 ) -> t.List[exp.Expression]: 5202 return self._parse_wrapped( 5203 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5204 ) 5205 5206 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5207 wrapped = self._match(TokenType.L_PAREN) 5208 if not wrapped and not optional: 5209 self.raise_error("Expecting (") 5210 parse_result = parse_method() 5211 if wrapped: 5212 self._match_r_paren() 5213 return parse_result 5214 5215 def _parse_expressions(self) -> t.List[exp.Expression]: 5216 return self._parse_csv(self._parse_expression) 5217 5218 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5219 return self._parse_select() or self._parse_set_operations( 5220 self._parse_expression() if alias else self._parse_conjunction() 5221 ) 5222 5223 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5224 return self._parse_query_modifiers( 5225 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5226 ) 5227 5228 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5229 this = None 5230 if self._match_texts(self.TRANSACTION_KIND): 5231 this = self._prev.text 5232 5233 self._match_texts(("TRANSACTION", "WORK")) 5234 5235 modes = [] 5236 while True: 5237 mode = [] 5238 while self._match(TokenType.VAR): 5239 mode.append(self._prev.text) 5240 5241 if mode: 5242 modes.append(" ".join(mode)) 5243 if not self._match(TokenType.COMMA): 5244 break 5245 5246 return self.expression(exp.Transaction, this=this, modes=modes) 5247 5248 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5249 chain = None 5250 savepoint = None 5251 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5252 5253 self._match_texts(("TRANSACTION", "WORK")) 5254 5255 if self._match_text_seq("TO"): 5256 self._match_text_seq("SAVEPOINT") 5257 savepoint = self._parse_id_var() 5258 5259 if self._match(TokenType.AND): 5260 chain = not self._match_text_seq("NO") 5261 self._match_text_seq("CHAIN") 5262 5263 if is_rollback: 5264 return self.expression(exp.Rollback, savepoint=savepoint) 5265 5266 return self.expression(exp.Commit, chain=chain) 5267 5268 def _parse_refresh(self) -> exp.Refresh: 5269 self._match(TokenType.TABLE) 5270 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5271 5272 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5273 if not self._match_text_seq("ADD"): 5274 return None 5275 5276 self._match(TokenType.COLUMN) 5277 exists_column = self._parse_exists(not_=True) 5278 expression = self._parse_field_def() 5279 5280 if expression: 5281 expression.set("exists", exists_column) 5282 5283 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5284 if self._match_texts(("FIRST", "AFTER")): 5285 position = self._prev.text 5286 column_position = self.expression( 5287 exp.ColumnPosition, this=self._parse_column(), position=position 5288 ) 5289 expression.set("position", column_position) 5290 5291 return expression 5292 5293 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5294 drop = self._match(TokenType.DROP) and self._parse_drop() 5295 if drop and not isinstance(drop, exp.Command): 5296 drop.set("kind", drop.args.get("kind", "COLUMN")) 5297 return drop 5298 5299 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5300 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5301 return self.expression( 5302 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5303 ) 5304 5305 def _parse_add_constraint(self) -> exp.AddConstraint: 5306 this = None 5307 kind = self._prev.token_type 5308 5309 if kind == TokenType.CONSTRAINT: 5310 this = self._parse_id_var() 5311 5312 if self._match_text_seq("CHECK"): 5313 expression = self._parse_wrapped(self._parse_conjunction) 5314 enforced = self._match_text_seq("ENFORCED") or False 5315 5316 return self.expression( 5317 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5318 ) 5319 5320 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5321 expression = self._parse_foreign_key() 5322 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5323 expression = self._parse_primary_key() 5324 else: 5325 expression = None 5326 5327 return self.expression(exp.AddConstraint, this=this, expression=expression) 5328 5329 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5330 index = self._index - 1 5331 5332 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5333 return self._parse_csv(self._parse_add_constraint) 5334 5335 self._retreat(index) 5336 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5337 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5338 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5339 5340 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5341 self._match(TokenType.COLUMN) 5342 column = self._parse_field(any_token=True) 5343 5344 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5345 return self.expression(exp.AlterColumn, this=column, drop=True) 5346 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5347 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5348 if self._match(TokenType.COMMENT): 5349 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5350 5351 self._match_text_seq("SET", "DATA") 5352 return self.expression( 5353 exp.AlterColumn, 5354 this=column, 5355 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5356 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5357 using=self._match(TokenType.USING) and self._parse_conjunction(), 5358 ) 5359 5360 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5361 index = self._index - 1 5362 5363 partition_exists = self._parse_exists() 5364 if self._match(TokenType.PARTITION, advance=False): 5365 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5366 5367 self._retreat(index) 5368 return self._parse_csv(self._parse_drop_column) 5369 5370 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5371 if self._match(TokenType.COLUMN): 5372 exists = self._parse_exists() 5373 old_column = self._parse_column() 5374 to = self._match_text_seq("TO") 5375 new_column = self._parse_column() 5376 5377 if old_column is None or to is None or new_column is None: 5378 return None 5379 5380 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5381 5382 self._match_text_seq("TO") 5383 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5384 5385 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5386 start = self._prev 5387 5388 if not self._match(TokenType.TABLE): 5389 return self._parse_as_command(start) 5390 5391 exists = self._parse_exists() 5392 only = self._match_text_seq("ONLY") 5393 this = self._parse_table(schema=True) 5394 5395 if self._next: 5396 self._advance() 5397 5398 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5399 if parser: 5400 actions = ensure_list(parser(self)) 5401 5402 if not self._curr and actions: 5403 return self.expression( 5404 exp.AlterTable, 5405 this=this, 5406 exists=exists, 5407 actions=actions, 5408 only=only, 5409 ) 5410 5411 return self._parse_as_command(start) 5412 5413 def _parse_merge(self) -> exp.Merge: 5414 self._match(TokenType.INTO) 5415 target = self._parse_table() 5416 5417 if target and self._match(TokenType.ALIAS, advance=False): 5418 target.set("alias", self._parse_table_alias()) 5419 5420 self._match(TokenType.USING) 5421 using = self._parse_table() 5422 5423 self._match(TokenType.ON) 5424 on = self._parse_conjunction() 5425 5426 return self.expression( 5427 exp.Merge, 5428 this=target, 5429 using=using, 5430 on=on, 5431 expressions=self._parse_when_matched(), 5432 ) 5433 5434 def _parse_when_matched(self) -> t.List[exp.When]: 5435 whens = [] 5436 5437 while self._match(TokenType.WHEN): 5438 matched = not self._match(TokenType.NOT) 5439 self._match_text_seq("MATCHED") 5440 source = ( 5441 False 5442 if self._match_text_seq("BY", "TARGET") 5443 else self._match_text_seq("BY", "SOURCE") 5444 ) 5445 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5446 5447 self._match(TokenType.THEN) 5448 5449 if self._match(TokenType.INSERT): 5450 _this = self._parse_star() 5451 if _this: 5452 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5453 else: 5454 then = self.expression( 5455 exp.Insert, 5456 this=self._parse_value(), 5457 expression=self._match(TokenType.VALUES) and self._parse_value(), 5458 ) 5459 elif self._match(TokenType.UPDATE): 5460 expressions = self._parse_star() 5461 if expressions: 5462 then = self.expression(exp.Update, expressions=expressions) 5463 else: 5464 then = self.expression( 5465 exp.Update, 5466 expressions=self._match(TokenType.SET) 5467 and self._parse_csv(self._parse_equality), 5468 ) 5469 elif self._match(TokenType.DELETE): 5470 then = self.expression(exp.Var, this=self._prev.text) 5471 else: 5472 then = None 5473 5474 whens.append( 5475 self.expression( 5476 exp.When, 5477 matched=matched, 5478 source=source, 5479 condition=condition, 5480 then=then, 5481 ) 5482 ) 5483 return whens 5484 5485 def _parse_show(self) -> t.Optional[exp.Expression]: 5486 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5487 if parser: 5488 return parser(self) 5489 return self._parse_as_command(self._prev) 5490 5491 def _parse_set_item_assignment( 5492 self, kind: t.Optional[str] = None 5493 ) -> t.Optional[exp.Expression]: 5494 index = self._index 5495 5496 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5497 return self._parse_set_transaction(global_=kind == "GLOBAL") 5498 5499 left = self._parse_primary() or self._parse_id_var() 5500 assignment_delimiter = self._match_texts(("=", "TO")) 5501 5502 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5503 self._retreat(index) 5504 return None 5505 5506 right = self._parse_statement() or self._parse_id_var() 5507 this = self.expression(exp.EQ, this=left, expression=right) 5508 5509 return self.expression(exp.SetItem, this=this, kind=kind) 5510 5511 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5512 self._match_text_seq("TRANSACTION") 5513 characteristics = self._parse_csv( 5514 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5515 ) 5516 return self.expression( 5517 exp.SetItem, 5518 expressions=characteristics, 5519 kind="TRANSACTION", 5520 **{"global": global_}, # type: ignore 5521 ) 5522 5523 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5524 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5525 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5526 5527 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5528 index = self._index 5529 set_ = self.expression( 5530 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5531 ) 5532 5533 if self._curr: 5534 self._retreat(index) 5535 return self._parse_as_command(self._prev) 5536 5537 return set_ 5538 5539 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5540 for option in options: 5541 if self._match_text_seq(*option.split(" ")): 5542 return exp.var(option) 5543 return None 5544 5545 def _parse_as_command(self, start: Token) -> exp.Command: 5546 while self._curr: 5547 self._advance() 5548 text = self._find_sql(start, self._prev) 5549 size = len(start.text) 5550 self._warn_unsupported() 5551 return exp.Command(this=text[:size], expression=text[size:]) 5552 5553 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5554 settings = [] 5555 5556 self._match_l_paren() 5557 kind = self._parse_id_var() 5558 5559 if self._match(TokenType.L_PAREN): 5560 while True: 5561 key = self._parse_id_var() 5562 value = self._parse_primary() 5563 5564 if not key and value is None: 5565 break 5566 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5567 self._match(TokenType.R_PAREN) 5568 5569 self._match_r_paren() 5570 5571 return self.expression( 5572 exp.DictProperty, 5573 this=this, 5574 kind=kind.this if kind else None, 5575 settings=settings, 5576 ) 5577 5578 def _parse_dict_range(self, this: str) -> exp.DictRange: 5579 self._match_l_paren() 5580 has_min = self._match_text_seq("MIN") 5581 if has_min: 5582 min = self._parse_var() or self._parse_primary() 5583 self._match_text_seq("MAX") 5584 max = self._parse_var() or self._parse_primary() 5585 else: 5586 max = self._parse_var() or self._parse_primary() 5587 min = exp.Literal.number(0) 5588 self._match_r_paren() 5589 return self.expression(exp.DictRange, this=this, min=min, max=max) 5590 5591 def _parse_comprehension( 5592 self, this: t.Optional[exp.Expression] 5593 ) -> t.Optional[exp.Comprehension]: 5594 index = self._index 5595 expression = self._parse_column() 5596 if not self._match(TokenType.IN): 5597 self._retreat(index - 1) 5598 return None 5599 iterator = self._parse_column() 5600 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5601 return self.expression( 5602 exp.Comprehension, 5603 this=this, 5604 expression=expression, 5605 iterator=iterator, 5606 condition=condition, 5607 ) 5608 5609 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5610 if self._match(TokenType.HEREDOC_STRING): 5611 return self.expression(exp.Heredoc, this=self._prev.text) 5612 5613 if not self._match_text_seq("$"): 5614 return None 5615 5616 tags = ["$"] 5617 tag_text = None 5618 5619 if self._is_connected(): 5620 self._advance() 5621 tags.append(self._prev.text.upper()) 5622 else: 5623 self.raise_error("No closing $ found") 5624 5625 if tags[-1] != "$": 5626 if self._is_connected() and self._match_text_seq("$"): 5627 tag_text = tags[-1] 5628 tags.append("$") 5629 else: 5630 self.raise_error("No closing $ found") 5631 5632 heredoc_start = self._curr 5633 5634 while self._curr: 5635 if self._match_text_seq(*tags, advance=False): 5636 this = self._find_sql(heredoc_start, self._prev) 5637 self._advance(len(tags)) 5638 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5639 5640 self._advance() 5641 5642 self.raise_error(f"No closing {''.join(tags)} found") 5643 return None 5644 5645 def _find_parser( 5646 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5647 ) -> t.Optional[t.Callable]: 5648 if not self._curr: 5649 return None 5650 5651 index = self._index 5652 this = [] 5653 while True: 5654 # The current token might be multiple words 5655 curr = self._curr.text.upper() 5656 key = curr.split(" ") 5657 this.append(curr) 5658 5659 self._advance() 5660 result, trie = in_trie(trie, key) 5661 if result == TrieResult.FAILED: 5662 break 5663 5664 if result == TrieResult.EXISTS: 5665 subparser = parsers[" ".join(this)] 5666 return subparser 5667 5668 self._retreat(index) 5669 return None 5670 5671 def _match(self, token_type, advance=True, expression=None): 5672 if not self._curr: 5673 return None 5674 5675 if self._curr.token_type == token_type: 5676 if advance: 5677 self._advance() 5678 self._add_comments(expression) 5679 return True 5680 5681 return None 5682 5683 def _match_set(self, types, advance=True): 5684 if not self._curr: 5685 return None 5686 5687 if self._curr.token_type in types: 5688 if advance: 5689 self._advance() 5690 return True 5691 5692 return None 5693 5694 def _match_pair(self, token_type_a, token_type_b, advance=True): 5695 if not self._curr or not self._next: 5696 return None 5697 5698 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5699 if advance: 5700 self._advance(2) 5701 return True 5702 5703 return None 5704 5705 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5706 if not self._match(TokenType.L_PAREN, expression=expression): 5707 self.raise_error("Expecting (") 5708 5709 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5710 if not self._match(TokenType.R_PAREN, expression=expression): 5711 self.raise_error("Expecting )") 5712 5713 def _match_texts(self, texts, advance=True): 5714 if self._curr and self._curr.text.upper() in texts: 5715 if advance: 5716 self._advance() 5717 return True 5718 return None 5719 5720 def _match_text_seq(self, *texts, advance=True): 5721 index = self._index 5722 for text in texts: 5723 if self._curr and self._curr.text.upper() == text: 5724 self._advance() 5725 else: 5726 self._retreat(index) 5727 return None 5728 5729 if not advance: 5730 self._retreat(index) 5731 5732 return True 5733 5734 @t.overload 5735 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: ... 5736 5737 @t.overload 5738 def _replace_columns_with_dots( 5739 self, this: t.Optional[exp.Expression] 5740 ) -> t.Optional[exp.Expression]: ... 5741 5742 def _replace_columns_with_dots(self, this): 5743 if isinstance(this, exp.Dot): 5744 exp.replace_children(this, self._replace_columns_with_dots) 5745 elif isinstance(this, exp.Column): 5746 exp.replace_children(this, self._replace_columns_with_dots) 5747 table = this.args.get("table") 5748 this = ( 5749 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5750 ) 5751 5752 return this 5753 5754 def _replace_lambda( 5755 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5756 ) -> t.Optional[exp.Expression]: 5757 if not node: 5758 return node 5759 5760 for column in node.find_all(exp.Column): 5761 if column.parts[0].name in lambda_variables: 5762 dot_or_id = column.to_dot() if column.table else column.this 5763 parent = column.parent 5764 5765 while isinstance(parent, exp.Dot): 5766 if not isinstance(parent.parent, exp.Dot): 5767 parent.replace(dot_or_id) 5768 break 5769 parent = parent.parent 5770 else: 5771 if column is node: 5772 node = dot_or_id 5773 else: 5774 column.replace(dot_or_id) 5775 return node
22def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 23 if len(args) == 1 and args[0].is_star: 24 return exp.StarMap(this=args[0]) 25 26 keys = [] 27 values = [] 28 for i in range(0, len(args), 2): 29 keys.append(args[i]) 30 values.append(args[i + 1]) 31 32 return exp.VarMap( 33 keys=exp.Array(expressions=keys), 34 values=exp.Array(expressions=values), 35 )
51def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
74class Parser(metaclass=_Parser): 75 """ 76 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 77 78 Args: 79 error_level: The desired error level. 80 Default: ErrorLevel.IMMEDIATE 81 error_message_context: Determines the amount of context to capture from a 82 query string when displaying the error message (in number of characters). 83 Default: 100 84 max_errors: Maximum number of error messages to include in a raised ParseError. 85 This is only relevant if error_level is ErrorLevel.RAISE. 86 Default: 3 87 """ 88 89 FUNCTIONS: t.Dict[str, t.Callable] = { 90 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 91 "CONCAT": lambda args, dialect: exp.Concat( 92 expressions=args, 93 safe=not dialect.STRICT_STRING_CONCAT, 94 coalesce=dialect.CONCAT_COALESCE, 95 ), 96 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 97 expressions=args, 98 safe=not dialect.STRICT_STRING_CONCAT, 99 coalesce=dialect.CONCAT_COALESCE, 100 ), 101 "DATE_TO_DATE_STR": lambda args: exp.Cast( 102 this=seq_get(args, 0), 103 to=exp.DataType(this=exp.DataType.Type.TEXT), 104 ), 105 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 106 "LIKE": parse_like, 107 "LOG": parse_logarithm, 108 "TIME_TO_TIME_STR": lambda args: exp.Cast( 109 this=seq_get(args, 0), 110 to=exp.DataType(this=exp.DataType.Type.TEXT), 111 ), 112 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 113 this=exp.Cast( 114 this=seq_get(args, 0), 115 to=exp.DataType(this=exp.DataType.Type.TEXT), 116 ), 117 start=exp.Literal.number(1), 118 length=exp.Literal.number(10), 119 ), 120 "VAR_MAP": parse_var_map, 121 } 122 123 NO_PAREN_FUNCTIONS = { 124 TokenType.CURRENT_DATE: exp.CurrentDate, 125 TokenType.CURRENT_DATETIME: exp.CurrentDate, 126 TokenType.CURRENT_TIME: exp.CurrentTime, 127 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 128 TokenType.CURRENT_USER: exp.CurrentUser, 129 } 130 131 STRUCT_TYPE_TOKENS = { 132 TokenType.NESTED, 133 TokenType.STRUCT, 134 } 135 136 NESTED_TYPE_TOKENS = { 137 TokenType.ARRAY, 138 TokenType.LOWCARDINALITY, 139 TokenType.MAP, 140 TokenType.NULLABLE, 141 *STRUCT_TYPE_TOKENS, 142 } 143 144 ENUM_TYPE_TOKENS = { 145 TokenType.ENUM, 146 TokenType.ENUM8, 147 TokenType.ENUM16, 148 } 149 150 AGGREGATE_TYPE_TOKENS = { 151 TokenType.AGGREGATEFUNCTION, 152 TokenType.SIMPLEAGGREGATEFUNCTION, 153 } 154 155 TYPE_TOKENS = { 156 TokenType.BIT, 157 TokenType.BOOLEAN, 158 TokenType.TINYINT, 159 TokenType.UTINYINT, 160 TokenType.SMALLINT, 161 TokenType.USMALLINT, 162 TokenType.INT, 163 TokenType.UINT, 164 TokenType.BIGINT, 165 TokenType.UBIGINT, 166 TokenType.INT128, 167 TokenType.UINT128, 168 TokenType.INT256, 169 TokenType.UINT256, 170 TokenType.MEDIUMINT, 171 TokenType.UMEDIUMINT, 172 TokenType.FIXEDSTRING, 173 TokenType.FLOAT, 174 TokenType.DOUBLE, 175 TokenType.CHAR, 176 TokenType.NCHAR, 177 TokenType.VARCHAR, 178 TokenType.NVARCHAR, 179 TokenType.TEXT, 180 TokenType.MEDIUMTEXT, 181 TokenType.LONGTEXT, 182 TokenType.MEDIUMBLOB, 183 TokenType.LONGBLOB, 184 TokenType.BINARY, 185 TokenType.VARBINARY, 186 TokenType.JSON, 187 TokenType.JSONB, 188 TokenType.INTERVAL, 189 TokenType.TINYBLOB, 190 TokenType.TINYTEXT, 191 TokenType.TIME, 192 TokenType.TIMETZ, 193 TokenType.TIMESTAMP, 194 TokenType.TIMESTAMP_S, 195 TokenType.TIMESTAMP_MS, 196 TokenType.TIMESTAMP_NS, 197 TokenType.TIMESTAMPTZ, 198 TokenType.TIMESTAMPLTZ, 199 TokenType.DATETIME, 200 TokenType.DATETIME64, 201 TokenType.DATE, 202 TokenType.DATE32, 203 TokenType.INT4RANGE, 204 TokenType.INT4MULTIRANGE, 205 TokenType.INT8RANGE, 206 TokenType.INT8MULTIRANGE, 207 TokenType.NUMRANGE, 208 TokenType.NUMMULTIRANGE, 209 TokenType.TSRANGE, 210 TokenType.TSMULTIRANGE, 211 TokenType.TSTZRANGE, 212 TokenType.TSTZMULTIRANGE, 213 TokenType.DATERANGE, 214 TokenType.DATEMULTIRANGE, 215 TokenType.DECIMAL, 216 TokenType.UDECIMAL, 217 TokenType.BIGDECIMAL, 218 TokenType.UUID, 219 TokenType.GEOGRAPHY, 220 TokenType.GEOMETRY, 221 TokenType.HLLSKETCH, 222 TokenType.HSTORE, 223 TokenType.PSEUDO_TYPE, 224 TokenType.SUPER, 225 TokenType.SERIAL, 226 TokenType.SMALLSERIAL, 227 TokenType.BIGSERIAL, 228 TokenType.XML, 229 TokenType.YEAR, 230 TokenType.UNIQUEIDENTIFIER, 231 TokenType.USERDEFINED, 232 TokenType.MONEY, 233 TokenType.SMALLMONEY, 234 TokenType.ROWVERSION, 235 TokenType.IMAGE, 236 TokenType.VARIANT, 237 TokenType.OBJECT, 238 TokenType.OBJECT_IDENTIFIER, 239 TokenType.INET, 240 TokenType.IPADDRESS, 241 TokenType.IPPREFIX, 242 TokenType.IPV4, 243 TokenType.IPV6, 244 TokenType.UNKNOWN, 245 TokenType.NULL, 246 *ENUM_TYPE_TOKENS, 247 *NESTED_TYPE_TOKENS, 248 *AGGREGATE_TYPE_TOKENS, 249 } 250 251 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 252 TokenType.BIGINT: TokenType.UBIGINT, 253 TokenType.INT: TokenType.UINT, 254 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 255 TokenType.SMALLINT: TokenType.USMALLINT, 256 TokenType.TINYINT: TokenType.UTINYINT, 257 TokenType.DECIMAL: TokenType.UDECIMAL, 258 } 259 260 SUBQUERY_PREDICATES = { 261 TokenType.ANY: exp.Any, 262 TokenType.ALL: exp.All, 263 TokenType.EXISTS: exp.Exists, 264 TokenType.SOME: exp.Any, 265 } 266 267 RESERVED_TOKENS = { 268 *Tokenizer.SINGLE_TOKENS.values(), 269 TokenType.SELECT, 270 } 271 272 DB_CREATABLES = { 273 TokenType.DATABASE, 274 TokenType.SCHEMA, 275 TokenType.TABLE, 276 TokenType.VIEW, 277 TokenType.MODEL, 278 TokenType.DICTIONARY, 279 } 280 281 CREATABLES = { 282 TokenType.COLUMN, 283 TokenType.CONSTRAINT, 284 TokenType.FUNCTION, 285 TokenType.INDEX, 286 TokenType.PROCEDURE, 287 TokenType.FOREIGN_KEY, 288 *DB_CREATABLES, 289 } 290 291 # Tokens that can represent identifiers 292 ID_VAR_TOKENS = { 293 TokenType.VAR, 294 TokenType.ANTI, 295 TokenType.APPLY, 296 TokenType.ASC, 297 TokenType.AUTO_INCREMENT, 298 TokenType.BEGIN, 299 TokenType.CACHE, 300 TokenType.CASE, 301 TokenType.COLLATE, 302 TokenType.COMMAND, 303 TokenType.COMMENT, 304 TokenType.COMMIT, 305 TokenType.CONSTRAINT, 306 TokenType.DEFAULT, 307 TokenType.DELETE, 308 TokenType.DESC, 309 TokenType.DESCRIBE, 310 TokenType.DICTIONARY, 311 TokenType.DIV, 312 TokenType.END, 313 TokenType.EXECUTE, 314 TokenType.ESCAPE, 315 TokenType.FALSE, 316 TokenType.FIRST, 317 TokenType.FILTER, 318 TokenType.FINAL, 319 TokenType.FORMAT, 320 TokenType.FULL, 321 TokenType.IS, 322 TokenType.ISNULL, 323 TokenType.INTERVAL, 324 TokenType.KEEP, 325 TokenType.KILL, 326 TokenType.LEFT, 327 TokenType.LOAD, 328 TokenType.MERGE, 329 TokenType.NATURAL, 330 TokenType.NEXT, 331 TokenType.OFFSET, 332 TokenType.OPERATOR, 333 TokenType.ORDINALITY, 334 TokenType.OVERLAPS, 335 TokenType.OVERWRITE, 336 TokenType.PARTITION, 337 TokenType.PERCENT, 338 TokenType.PIVOT, 339 TokenType.PRAGMA, 340 TokenType.RANGE, 341 TokenType.RECURSIVE, 342 TokenType.REFERENCES, 343 TokenType.REFRESH, 344 TokenType.REPLACE, 345 TokenType.RIGHT, 346 TokenType.ROW, 347 TokenType.ROWS, 348 TokenType.SEMI, 349 TokenType.SET, 350 TokenType.SETTINGS, 351 TokenType.SHOW, 352 TokenType.TEMPORARY, 353 TokenType.TOP, 354 TokenType.TRUE, 355 TokenType.UNIQUE, 356 TokenType.UNPIVOT, 357 TokenType.UPDATE, 358 TokenType.USE, 359 TokenType.VOLATILE, 360 TokenType.WINDOW, 361 *CREATABLES, 362 *SUBQUERY_PREDICATES, 363 *TYPE_TOKENS, 364 *NO_PAREN_FUNCTIONS, 365 } 366 367 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 368 369 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 370 TokenType.ANTI, 371 TokenType.APPLY, 372 TokenType.ASOF, 373 TokenType.FULL, 374 TokenType.LEFT, 375 TokenType.LOCK, 376 TokenType.NATURAL, 377 TokenType.OFFSET, 378 TokenType.RIGHT, 379 TokenType.SEMI, 380 TokenType.WINDOW, 381 } 382 383 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 384 385 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 386 387 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 388 389 FUNC_TOKENS = { 390 TokenType.COLLATE, 391 TokenType.COMMAND, 392 TokenType.CURRENT_DATE, 393 TokenType.CURRENT_DATETIME, 394 TokenType.CURRENT_TIMESTAMP, 395 TokenType.CURRENT_TIME, 396 TokenType.CURRENT_USER, 397 TokenType.FILTER, 398 TokenType.FIRST, 399 TokenType.FORMAT, 400 TokenType.GLOB, 401 TokenType.IDENTIFIER, 402 TokenType.INDEX, 403 TokenType.ISNULL, 404 TokenType.ILIKE, 405 TokenType.INSERT, 406 TokenType.LIKE, 407 TokenType.MERGE, 408 TokenType.OFFSET, 409 TokenType.PRIMARY_KEY, 410 TokenType.RANGE, 411 TokenType.REPLACE, 412 TokenType.RLIKE, 413 TokenType.ROW, 414 TokenType.UNNEST, 415 TokenType.VAR, 416 TokenType.LEFT, 417 TokenType.RIGHT, 418 TokenType.DATE, 419 TokenType.DATETIME, 420 TokenType.TABLE, 421 TokenType.TIMESTAMP, 422 TokenType.TIMESTAMPTZ, 423 TokenType.WINDOW, 424 TokenType.XOR, 425 *TYPE_TOKENS, 426 *SUBQUERY_PREDICATES, 427 } 428 429 CONJUNCTION = { 430 TokenType.AND: exp.And, 431 TokenType.OR: exp.Or, 432 } 433 434 EQUALITY = { 435 TokenType.COLON_EQ: exp.PropertyEQ, 436 TokenType.EQ: exp.EQ, 437 TokenType.NEQ: exp.NEQ, 438 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 439 } 440 441 COMPARISON = { 442 TokenType.GT: exp.GT, 443 TokenType.GTE: exp.GTE, 444 TokenType.LT: exp.LT, 445 TokenType.LTE: exp.LTE, 446 } 447 448 BITWISE = { 449 TokenType.AMP: exp.BitwiseAnd, 450 TokenType.CARET: exp.BitwiseXor, 451 TokenType.PIPE: exp.BitwiseOr, 452 } 453 454 TERM = { 455 TokenType.DASH: exp.Sub, 456 TokenType.PLUS: exp.Add, 457 TokenType.MOD: exp.Mod, 458 TokenType.COLLATE: exp.Collate, 459 } 460 461 FACTOR = { 462 TokenType.DIV: exp.IntDiv, 463 TokenType.LR_ARROW: exp.Distance, 464 TokenType.SLASH: exp.Div, 465 TokenType.STAR: exp.Mul, 466 } 467 468 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 469 470 TIMES = { 471 TokenType.TIME, 472 TokenType.TIMETZ, 473 } 474 475 TIMESTAMPS = { 476 TokenType.TIMESTAMP, 477 TokenType.TIMESTAMPTZ, 478 TokenType.TIMESTAMPLTZ, 479 *TIMES, 480 } 481 482 SET_OPERATIONS = { 483 TokenType.UNION, 484 TokenType.INTERSECT, 485 TokenType.EXCEPT, 486 } 487 488 JOIN_METHODS = { 489 TokenType.NATURAL, 490 TokenType.ASOF, 491 } 492 493 JOIN_SIDES = { 494 TokenType.LEFT, 495 TokenType.RIGHT, 496 TokenType.FULL, 497 } 498 499 JOIN_KINDS = { 500 TokenType.INNER, 501 TokenType.OUTER, 502 TokenType.CROSS, 503 TokenType.SEMI, 504 TokenType.ANTI, 505 } 506 507 JOIN_HINTS: t.Set[str] = set() 508 509 LAMBDAS = { 510 TokenType.ARROW: lambda self, expressions: self.expression( 511 exp.Lambda, 512 this=self._replace_lambda( 513 self._parse_conjunction(), 514 {node.name for node in expressions}, 515 ), 516 expressions=expressions, 517 ), 518 TokenType.FARROW: lambda self, expressions: self.expression( 519 exp.Kwarg, 520 this=exp.var(expressions[0].name), 521 expression=self._parse_conjunction(), 522 ), 523 } 524 525 COLUMN_OPERATORS = { 526 TokenType.DOT: None, 527 TokenType.DCOLON: lambda self, this, to: self.expression( 528 exp.Cast if self.STRICT_CAST else exp.TryCast, 529 this=this, 530 to=to, 531 ), 532 TokenType.ARROW: lambda self, this, path: self.expression( 533 exp.JSONExtract, 534 this=this, 535 expression=path, 536 ), 537 TokenType.DARROW: lambda self, this, path: self.expression( 538 exp.JSONExtractScalar, 539 this=this, 540 expression=path, 541 ), 542 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 543 exp.JSONBExtract, 544 this=this, 545 expression=path, 546 ), 547 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 548 exp.JSONBExtractScalar, 549 this=this, 550 expression=path, 551 ), 552 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 553 exp.JSONBContains, 554 this=this, 555 expression=key, 556 ), 557 } 558 559 EXPRESSION_PARSERS = { 560 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 561 exp.Column: lambda self: self._parse_column(), 562 exp.Condition: lambda self: self._parse_conjunction(), 563 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 564 exp.Expression: lambda self: self._parse_statement(), 565 exp.From: lambda self: self._parse_from(), 566 exp.Group: lambda self: self._parse_group(), 567 exp.Having: lambda self: self._parse_having(), 568 exp.Identifier: lambda self: self._parse_id_var(), 569 exp.Join: lambda self: self._parse_join(), 570 exp.Lambda: lambda self: self._parse_lambda(), 571 exp.Lateral: lambda self: self._parse_lateral(), 572 exp.Limit: lambda self: self._parse_limit(), 573 exp.Offset: lambda self: self._parse_offset(), 574 exp.Order: lambda self: self._parse_order(), 575 exp.Ordered: lambda self: self._parse_ordered(), 576 exp.Properties: lambda self: self._parse_properties(), 577 exp.Qualify: lambda self: self._parse_qualify(), 578 exp.Returning: lambda self: self._parse_returning(), 579 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 580 exp.Table: lambda self: self._parse_table_parts(), 581 exp.TableAlias: lambda self: self._parse_table_alias(), 582 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 583 exp.Where: lambda self: self._parse_where(), 584 exp.Window: lambda self: self._parse_named_window(), 585 exp.With: lambda self: self._parse_with(), 586 "JOIN_TYPE": lambda self: self._parse_join_parts(), 587 } 588 589 STATEMENT_PARSERS = { 590 TokenType.ALTER: lambda self: self._parse_alter(), 591 TokenType.BEGIN: lambda self: self._parse_transaction(), 592 TokenType.CACHE: lambda self: self._parse_cache(), 593 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 594 TokenType.COMMENT: lambda self: self._parse_comment(), 595 TokenType.CREATE: lambda self: self._parse_create(), 596 TokenType.DELETE: lambda self: self._parse_delete(), 597 TokenType.DESC: lambda self: self._parse_describe(), 598 TokenType.DESCRIBE: lambda self: self._parse_describe(), 599 TokenType.DROP: lambda self: self._parse_drop(), 600 TokenType.INSERT: lambda self: self._parse_insert(), 601 TokenType.KILL: lambda self: self._parse_kill(), 602 TokenType.LOAD: lambda self: self._parse_load(), 603 TokenType.MERGE: lambda self: self._parse_merge(), 604 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 605 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 606 TokenType.REFRESH: lambda self: self._parse_refresh(), 607 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 608 TokenType.SET: lambda self: self._parse_set(), 609 TokenType.UNCACHE: lambda self: self._parse_uncache(), 610 TokenType.UPDATE: lambda self: self._parse_update(), 611 TokenType.USE: lambda self: self.expression( 612 exp.Use, 613 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 614 and exp.var(self._prev.text), 615 this=self._parse_table(schema=False), 616 ), 617 } 618 619 UNARY_PARSERS = { 620 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 621 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 622 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 623 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 624 } 625 626 PRIMARY_PARSERS = { 627 TokenType.STRING: lambda self, token: self.expression( 628 exp.Literal, this=token.text, is_string=True 629 ), 630 TokenType.NUMBER: lambda self, token: self.expression( 631 exp.Literal, this=token.text, is_string=False 632 ), 633 TokenType.STAR: lambda self, _: self.expression( 634 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 635 ), 636 TokenType.NULL: lambda self, _: self.expression(exp.Null), 637 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 638 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 639 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 640 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 641 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 642 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 643 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 644 exp.National, this=token.text 645 ), 646 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 647 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 648 exp.RawString, this=token.text 649 ), 650 TokenType.UNICODE_STRING: lambda self, token: self.expression( 651 exp.UnicodeString, 652 this=token.text, 653 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 654 ), 655 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 656 } 657 658 PLACEHOLDER_PARSERS = { 659 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 660 TokenType.PARAMETER: lambda self: self._parse_parameter(), 661 TokenType.COLON: lambda self: ( 662 self.expression(exp.Placeholder, this=self._prev.text) 663 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 664 else None 665 ), 666 } 667 668 RANGE_PARSERS = { 669 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 670 TokenType.GLOB: binary_range_parser(exp.Glob), 671 TokenType.ILIKE: binary_range_parser(exp.ILike), 672 TokenType.IN: lambda self, this: self._parse_in(this), 673 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 674 TokenType.IS: lambda self, this: self._parse_is(this), 675 TokenType.LIKE: binary_range_parser(exp.Like), 676 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 677 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 678 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 679 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 680 } 681 682 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 683 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 684 "AUTO": lambda self: self._parse_auto_property(), 685 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 686 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 687 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 688 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 689 "CHECKSUM": lambda self: self._parse_checksum(), 690 "CLUSTER BY": lambda self: self._parse_cluster(), 691 "CLUSTERED": lambda self: self._parse_clustered_by(), 692 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 693 exp.CollateProperty, **kwargs 694 ), 695 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 696 "CONTAINS": lambda self: self._parse_contains_property(), 697 "COPY": lambda self: self._parse_copy_property(), 698 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 699 "DEFINER": lambda self: self._parse_definer(), 700 "DETERMINISTIC": lambda self: self.expression( 701 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 702 ), 703 "DISTKEY": lambda self: self._parse_distkey(), 704 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 705 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 706 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 707 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 708 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 709 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 710 "FREESPACE": lambda self: self._parse_freespace(), 711 "HEAP": lambda self: self.expression(exp.HeapProperty), 712 "IMMUTABLE": lambda self: self.expression( 713 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 714 ), 715 "INHERITS": lambda self: self.expression( 716 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 717 ), 718 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 719 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 720 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 721 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 722 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 723 "LIKE": lambda self: self._parse_create_like(), 724 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 725 "LOCK": lambda self: self._parse_locking(), 726 "LOCKING": lambda self: self._parse_locking(), 727 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 728 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 729 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 730 "MODIFIES": lambda self: self._parse_modifies_property(), 731 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 732 "NO": lambda self: self._parse_no_property(), 733 "ON": lambda self: self._parse_on_property(), 734 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 735 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 736 "PARTITION": lambda self: self._parse_partitioned_of(), 737 "PARTITION BY": lambda self: self._parse_partitioned_by(), 738 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 739 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 740 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 741 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 742 "READS": lambda self: self._parse_reads_property(), 743 "REMOTE": lambda self: self._parse_remote_with_connection(), 744 "RETURNS": lambda self: self._parse_returns(), 745 "ROW": lambda self: self._parse_row(), 746 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 747 "SAMPLE": lambda self: self.expression( 748 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 749 ), 750 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 751 "SETTINGS": lambda self: self.expression( 752 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 753 ), 754 "SORTKEY": lambda self: self._parse_sortkey(), 755 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 756 "STABLE": lambda self: self.expression( 757 exp.StabilityProperty, this=exp.Literal.string("STABLE") 758 ), 759 "STORED": lambda self: self._parse_stored(), 760 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 761 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 762 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 763 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 764 "TO": lambda self: self._parse_to_table(), 765 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 766 "TRANSFORM": lambda self: self.expression( 767 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 768 ), 769 "TTL": lambda self: self._parse_ttl(), 770 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 771 "VOLATILE": lambda self: self._parse_volatile_property(), 772 "WITH": lambda self: self._parse_with_property(), 773 } 774 775 CONSTRAINT_PARSERS = { 776 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 777 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 778 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 779 "CHARACTER SET": lambda self: self.expression( 780 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 781 ), 782 "CHECK": lambda self: self.expression( 783 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 784 ), 785 "COLLATE": lambda self: self.expression( 786 exp.CollateColumnConstraint, this=self._parse_var() 787 ), 788 "COMMENT": lambda self: self.expression( 789 exp.CommentColumnConstraint, this=self._parse_string() 790 ), 791 "COMPRESS": lambda self: self._parse_compress(), 792 "CLUSTERED": lambda self: self.expression( 793 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 794 ), 795 "NONCLUSTERED": lambda self: self.expression( 796 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 797 ), 798 "DEFAULT": lambda self: self.expression( 799 exp.DefaultColumnConstraint, this=self._parse_bitwise() 800 ), 801 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 802 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 803 "FORMAT": lambda self: self.expression( 804 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 805 ), 806 "GENERATED": lambda self: self._parse_generated_as_identity(), 807 "IDENTITY": lambda self: self._parse_auto_increment(), 808 "INLINE": lambda self: self._parse_inline(), 809 "LIKE": lambda self: self._parse_create_like(), 810 "NOT": lambda self: self._parse_not_constraint(), 811 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 812 "ON": lambda self: ( 813 self._match(TokenType.UPDATE) 814 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 815 ) 816 or self.expression(exp.OnProperty, this=self._parse_id_var()), 817 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 818 "PERIOD": lambda self: self._parse_period_for_system_time(), 819 "PRIMARY KEY": lambda self: self._parse_primary_key(), 820 "REFERENCES": lambda self: self._parse_references(match=False), 821 "TITLE": lambda self: self.expression( 822 exp.TitleColumnConstraint, this=self._parse_var_or_string() 823 ), 824 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 825 "UNIQUE": lambda self: self._parse_unique(), 826 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 827 "WITH": lambda self: self.expression( 828 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 829 ), 830 } 831 832 ALTER_PARSERS = { 833 "ADD": lambda self: self._parse_alter_table_add(), 834 "ALTER": lambda self: self._parse_alter_table_alter(), 835 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 836 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 837 "DROP": lambda self: self._parse_alter_table_drop(), 838 "RENAME": lambda self: self._parse_alter_table_rename(), 839 } 840 841 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 842 843 NO_PAREN_FUNCTION_PARSERS = { 844 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 845 "CASE": lambda self: self._parse_case(), 846 "IF": lambda self: self._parse_if(), 847 "NEXT": lambda self: self._parse_next_value_for(), 848 } 849 850 INVALID_FUNC_NAME_TOKENS = { 851 TokenType.IDENTIFIER, 852 TokenType.STRING, 853 } 854 855 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 856 857 FUNCTION_PARSERS = { 858 "ANY_VALUE": lambda self: self._parse_any_value(), 859 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 860 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 861 "DECODE": lambda self: self._parse_decode(), 862 "EXTRACT": lambda self: self._parse_extract(), 863 "JSON_OBJECT": lambda self: self._parse_json_object(), 864 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 865 "JSON_TABLE": lambda self: self._parse_json_table(), 866 "MATCH": lambda self: self._parse_match_against(), 867 "OPENJSON": lambda self: self._parse_open_json(), 868 "POSITION": lambda self: self._parse_position(), 869 "PREDICT": lambda self: self._parse_predict(), 870 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 871 "STRING_AGG": lambda self: self._parse_string_agg(), 872 "SUBSTRING": lambda self: self._parse_substring(), 873 "TRIM": lambda self: self._parse_trim(), 874 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 875 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 876 } 877 878 QUERY_MODIFIER_PARSERS = { 879 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 880 TokenType.WHERE: lambda self: ("where", self._parse_where()), 881 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 882 TokenType.HAVING: lambda self: ("having", self._parse_having()), 883 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 884 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 885 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 886 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 887 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 888 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 889 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 890 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 891 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 892 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 893 TokenType.CLUSTER_BY: lambda self: ( 894 "cluster", 895 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 896 ), 897 TokenType.DISTRIBUTE_BY: lambda self: ( 898 "distribute", 899 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 900 ), 901 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 902 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 903 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 904 } 905 906 SET_PARSERS = { 907 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 908 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 909 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 910 "TRANSACTION": lambda self: self._parse_set_transaction(), 911 } 912 913 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 914 915 TYPE_LITERAL_PARSERS = { 916 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 917 } 918 919 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 920 921 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 922 923 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 924 925 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 926 TRANSACTION_CHARACTERISTICS = { 927 "ISOLATION LEVEL REPEATABLE READ", 928 "ISOLATION LEVEL READ COMMITTED", 929 "ISOLATION LEVEL READ UNCOMMITTED", 930 "ISOLATION LEVEL SERIALIZABLE", 931 "READ WRITE", 932 "READ ONLY", 933 } 934 935 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 936 937 CLONE_KEYWORDS = {"CLONE", "COPY"} 938 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 939 940 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 941 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 942 943 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 944 945 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 946 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 947 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 948 949 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 950 951 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 952 953 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 954 955 DISTINCT_TOKENS = {TokenType.DISTINCT} 956 957 NULL_TOKENS = {TokenType.NULL} 958 959 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 960 961 STRICT_CAST = True 962 963 PREFIXED_PIVOT_COLUMNS = False 964 IDENTIFY_PIVOT_STRINGS = False 965 966 LOG_DEFAULTS_TO_LN = False 967 968 # Whether or not ADD is present for each column added by ALTER TABLE 969 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 970 971 # Whether or not the table sample clause expects CSV syntax 972 TABLESAMPLE_CSV = False 973 974 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 975 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 976 977 # Whether the TRIM function expects the characters to trim as its first argument 978 TRIM_PATTERN_FIRST = False 979 980 # Whether or not string aliases are supported `SELECT COUNT(*) 'count'` 981 STRING_ALIASES = False 982 983 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 984 MODIFIERS_ATTACHED_TO_UNION = True 985 UNION_MODIFIERS = {"order", "limit", "offset"} 986 987 # parses no parenthesis if statements as commands 988 NO_PAREN_IF_COMMANDS = True 989 990 __slots__ = ( 991 "error_level", 992 "error_message_context", 993 "max_errors", 994 "dialect", 995 "sql", 996 "errors", 997 "_tokens", 998 "_index", 999 "_curr", 1000 "_next", 1001 "_prev", 1002 "_prev_comments", 1003 ) 1004 1005 # Autofilled 1006 SHOW_TRIE: t.Dict = {} 1007 SET_TRIE: t.Dict = {} 1008 1009 def __init__( 1010 self, 1011 error_level: t.Optional[ErrorLevel] = None, 1012 error_message_context: int = 100, 1013 max_errors: int = 3, 1014 dialect: DialectType = None, 1015 ): 1016 from sqlglot.dialects import Dialect 1017 1018 self.error_level = error_level or ErrorLevel.IMMEDIATE 1019 self.error_message_context = error_message_context 1020 self.max_errors = max_errors 1021 self.dialect = Dialect.get_or_raise(dialect) 1022 self.reset() 1023 1024 def reset(self): 1025 self.sql = "" 1026 self.errors = [] 1027 self._tokens = [] 1028 self._index = 0 1029 self._curr = None 1030 self._next = None 1031 self._prev = None 1032 self._prev_comments = None 1033 1034 def parse( 1035 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1036 ) -> t.List[t.Optional[exp.Expression]]: 1037 """ 1038 Parses a list of tokens and returns a list of syntax trees, one tree 1039 per parsed SQL statement. 1040 1041 Args: 1042 raw_tokens: The list of tokens. 1043 sql: The original SQL string, used to produce helpful debug messages. 1044 1045 Returns: 1046 The list of the produced syntax trees. 1047 """ 1048 return self._parse( 1049 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1050 ) 1051 1052 def parse_into( 1053 self, 1054 expression_types: exp.IntoType, 1055 raw_tokens: t.List[Token], 1056 sql: t.Optional[str] = None, 1057 ) -> t.List[t.Optional[exp.Expression]]: 1058 """ 1059 Parses a list of tokens into a given Expression type. If a collection of Expression 1060 types is given instead, this method will try to parse the token list into each one 1061 of them, stopping at the first for which the parsing succeeds. 1062 1063 Args: 1064 expression_types: The expression type(s) to try and parse the token list into. 1065 raw_tokens: The list of tokens. 1066 sql: The original SQL string, used to produce helpful debug messages. 1067 1068 Returns: 1069 The target Expression. 1070 """ 1071 errors = [] 1072 for expression_type in ensure_list(expression_types): 1073 parser = self.EXPRESSION_PARSERS.get(expression_type) 1074 if not parser: 1075 raise TypeError(f"No parser registered for {expression_type}") 1076 1077 try: 1078 return self._parse(parser, raw_tokens, sql) 1079 except ParseError as e: 1080 e.errors[0]["into_expression"] = expression_type 1081 errors.append(e) 1082 1083 raise ParseError( 1084 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1085 errors=merge_errors(errors), 1086 ) from errors[-1] 1087 1088 def _parse( 1089 self, 1090 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1091 raw_tokens: t.List[Token], 1092 sql: t.Optional[str] = None, 1093 ) -> t.List[t.Optional[exp.Expression]]: 1094 self.reset() 1095 self.sql = sql or "" 1096 1097 total = len(raw_tokens) 1098 chunks: t.List[t.List[Token]] = [[]] 1099 1100 for i, token in enumerate(raw_tokens): 1101 if token.token_type == TokenType.SEMICOLON: 1102 if i < total - 1: 1103 chunks.append([]) 1104 else: 1105 chunks[-1].append(token) 1106 1107 expressions = [] 1108 1109 for tokens in chunks: 1110 self._index = -1 1111 self._tokens = tokens 1112 self._advance() 1113 1114 expressions.append(parse_method(self)) 1115 1116 if self._index < len(self._tokens): 1117 self.raise_error("Invalid expression / Unexpected token") 1118 1119 self.check_errors() 1120 1121 return expressions 1122 1123 def check_errors(self) -> None: 1124 """Logs or raises any found errors, depending on the chosen error level setting.""" 1125 if self.error_level == ErrorLevel.WARN: 1126 for error in self.errors: 1127 logger.error(str(error)) 1128 elif self.error_level == ErrorLevel.RAISE and self.errors: 1129 raise ParseError( 1130 concat_messages(self.errors, self.max_errors), 1131 errors=merge_errors(self.errors), 1132 ) 1133 1134 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1135 """ 1136 Appends an error in the list of recorded errors or raises it, depending on the chosen 1137 error level setting. 1138 """ 1139 token = token or self._curr or self._prev or Token.string("") 1140 start = token.start 1141 end = token.end + 1 1142 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1143 highlight = self.sql[start:end] 1144 end_context = self.sql[end : end + self.error_message_context] 1145 1146 error = ParseError.new( 1147 f"{message}. Line {token.line}, Col: {token.col}.\n" 1148 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1149 description=message, 1150 line=token.line, 1151 col=token.col, 1152 start_context=start_context, 1153 highlight=highlight, 1154 end_context=end_context, 1155 ) 1156 1157 if self.error_level == ErrorLevel.IMMEDIATE: 1158 raise error 1159 1160 self.errors.append(error) 1161 1162 def expression( 1163 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1164 ) -> E: 1165 """ 1166 Creates a new, validated Expression. 1167 1168 Args: 1169 exp_class: The expression class to instantiate. 1170 comments: An optional list of comments to attach to the expression. 1171 kwargs: The arguments to set for the expression along with their respective values. 1172 1173 Returns: 1174 The target expression. 1175 """ 1176 instance = exp_class(**kwargs) 1177 instance.add_comments(comments) if comments else self._add_comments(instance) 1178 return self.validate_expression(instance) 1179 1180 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1181 if expression and self._prev_comments: 1182 expression.add_comments(self._prev_comments) 1183 self._prev_comments = None 1184 1185 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1186 """ 1187 Validates an Expression, making sure that all its mandatory arguments are set. 1188 1189 Args: 1190 expression: The expression to validate. 1191 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1192 1193 Returns: 1194 The validated expression. 1195 """ 1196 if self.error_level != ErrorLevel.IGNORE: 1197 for error_message in expression.error_messages(args): 1198 self.raise_error(error_message) 1199 1200 return expression 1201 1202 def _find_sql(self, start: Token, end: Token) -> str: 1203 return self.sql[start.start : end.end + 1] 1204 1205 def _is_connected(self) -> bool: 1206 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1207 1208 def _advance(self, times: int = 1) -> None: 1209 self._index += times 1210 self._curr = seq_get(self._tokens, self._index) 1211 self._next = seq_get(self._tokens, self._index + 1) 1212 1213 if self._index > 0: 1214 self._prev = self._tokens[self._index - 1] 1215 self._prev_comments = self._prev.comments 1216 else: 1217 self._prev = None 1218 self._prev_comments = None 1219 1220 def _retreat(self, index: int) -> None: 1221 if index != self._index: 1222 self._advance(index - self._index) 1223 1224 def _warn_unsupported(self) -> None: 1225 if len(self._tokens) <= 1: 1226 return 1227 1228 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1229 # interested in emitting a warning for the one being currently processed. 1230 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1231 1232 logger.warning( 1233 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1234 ) 1235 1236 def _parse_command(self) -> exp.Command: 1237 self._warn_unsupported() 1238 return self.expression( 1239 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1240 ) 1241 1242 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1243 start = self._prev 1244 exists = self._parse_exists() if allow_exists else None 1245 1246 self._match(TokenType.ON) 1247 1248 kind = self._match_set(self.CREATABLES) and self._prev 1249 if not kind: 1250 return self._parse_as_command(start) 1251 1252 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1253 this = self._parse_user_defined_function(kind=kind.token_type) 1254 elif kind.token_type == TokenType.TABLE: 1255 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1256 elif kind.token_type == TokenType.COLUMN: 1257 this = self._parse_column() 1258 else: 1259 this = self._parse_id_var() 1260 1261 self._match(TokenType.IS) 1262 1263 return self.expression( 1264 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1265 ) 1266 1267 def _parse_to_table( 1268 self, 1269 ) -> exp.ToTableProperty: 1270 table = self._parse_table_parts(schema=True) 1271 return self.expression(exp.ToTableProperty, this=table) 1272 1273 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1274 def _parse_ttl(self) -> exp.Expression: 1275 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1276 this = self._parse_bitwise() 1277 1278 if self._match_text_seq("DELETE"): 1279 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1280 if self._match_text_seq("RECOMPRESS"): 1281 return self.expression( 1282 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1283 ) 1284 if self._match_text_seq("TO", "DISK"): 1285 return self.expression( 1286 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1287 ) 1288 if self._match_text_seq("TO", "VOLUME"): 1289 return self.expression( 1290 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1291 ) 1292 1293 return this 1294 1295 expressions = self._parse_csv(_parse_ttl_action) 1296 where = self._parse_where() 1297 group = self._parse_group() 1298 1299 aggregates = None 1300 if group and self._match(TokenType.SET): 1301 aggregates = self._parse_csv(self._parse_set_item) 1302 1303 return self.expression( 1304 exp.MergeTreeTTL, 1305 expressions=expressions, 1306 where=where, 1307 group=group, 1308 aggregates=aggregates, 1309 ) 1310 1311 def _parse_statement(self) -> t.Optional[exp.Expression]: 1312 if self._curr is None: 1313 return None 1314 1315 if self._match_set(self.STATEMENT_PARSERS): 1316 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1317 1318 if self._match_set(Tokenizer.COMMANDS): 1319 return self._parse_command() 1320 1321 expression = self._parse_expression() 1322 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1323 return self._parse_query_modifiers(expression) 1324 1325 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1326 start = self._prev 1327 temporary = self._match(TokenType.TEMPORARY) 1328 materialized = self._match_text_seq("MATERIALIZED") 1329 1330 kind = self._match_set(self.CREATABLES) and self._prev.text 1331 if not kind: 1332 return self._parse_as_command(start) 1333 1334 return self.expression( 1335 exp.Drop, 1336 comments=start.comments, 1337 exists=exists or self._parse_exists(), 1338 this=self._parse_table(schema=True), 1339 kind=kind, 1340 temporary=temporary, 1341 materialized=materialized, 1342 cascade=self._match_text_seq("CASCADE"), 1343 constraints=self._match_text_seq("CONSTRAINTS"), 1344 purge=self._match_text_seq("PURGE"), 1345 ) 1346 1347 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1348 return ( 1349 self._match_text_seq("IF") 1350 and (not not_ or self._match(TokenType.NOT)) 1351 and self._match(TokenType.EXISTS) 1352 ) 1353 1354 def _parse_create(self) -> exp.Create | exp.Command: 1355 # Note: this can't be None because we've matched a statement parser 1356 start = self._prev 1357 comments = self._prev_comments 1358 1359 replace = ( 1360 start.token_type == TokenType.REPLACE 1361 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1362 or self._match_pair(TokenType.OR, TokenType.ALTER) 1363 ) 1364 unique = self._match(TokenType.UNIQUE) 1365 1366 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1367 self._advance() 1368 1369 properties = None 1370 create_token = self._match_set(self.CREATABLES) and self._prev 1371 1372 if not create_token: 1373 # exp.Properties.Location.POST_CREATE 1374 properties = self._parse_properties() 1375 create_token = self._match_set(self.CREATABLES) and self._prev 1376 1377 if not properties or not create_token: 1378 return self._parse_as_command(start) 1379 1380 exists = self._parse_exists(not_=True) 1381 this = None 1382 expression: t.Optional[exp.Expression] = None 1383 indexes = None 1384 no_schema_binding = None 1385 begin = None 1386 end = None 1387 clone = None 1388 1389 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1390 nonlocal properties 1391 if properties and temp_props: 1392 properties.expressions.extend(temp_props.expressions) 1393 elif temp_props: 1394 properties = temp_props 1395 1396 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1397 this = self._parse_user_defined_function(kind=create_token.token_type) 1398 1399 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1400 extend_props(self._parse_properties()) 1401 1402 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1403 1404 if not expression: 1405 if self._match(TokenType.COMMAND): 1406 expression = self._parse_as_command(self._prev) 1407 else: 1408 begin = self._match(TokenType.BEGIN) 1409 return_ = self._match_text_seq("RETURN") 1410 1411 if self._match(TokenType.STRING, advance=False): 1412 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1413 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1414 expression = self._parse_string() 1415 extend_props(self._parse_properties()) 1416 else: 1417 expression = self._parse_statement() 1418 1419 end = self._match_text_seq("END") 1420 1421 if return_: 1422 expression = self.expression(exp.Return, this=expression) 1423 elif create_token.token_type == TokenType.INDEX: 1424 this = self._parse_index(index=self._parse_id_var()) 1425 elif create_token.token_type in self.DB_CREATABLES: 1426 table_parts = self._parse_table_parts(schema=True) 1427 1428 # exp.Properties.Location.POST_NAME 1429 self._match(TokenType.COMMA) 1430 extend_props(self._parse_properties(before=True)) 1431 1432 this = self._parse_schema(this=table_parts) 1433 1434 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1435 extend_props(self._parse_properties()) 1436 1437 self._match(TokenType.ALIAS) 1438 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1439 # exp.Properties.Location.POST_ALIAS 1440 extend_props(self._parse_properties()) 1441 1442 expression = self._parse_ddl_select() 1443 1444 if create_token.token_type == TokenType.TABLE: 1445 # exp.Properties.Location.POST_EXPRESSION 1446 extend_props(self._parse_properties()) 1447 1448 indexes = [] 1449 while True: 1450 index = self._parse_index() 1451 1452 # exp.Properties.Location.POST_INDEX 1453 extend_props(self._parse_properties()) 1454 1455 if not index: 1456 break 1457 else: 1458 self._match(TokenType.COMMA) 1459 indexes.append(index) 1460 elif create_token.token_type == TokenType.VIEW: 1461 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1462 no_schema_binding = True 1463 1464 shallow = self._match_text_seq("SHALLOW") 1465 1466 if self._match_texts(self.CLONE_KEYWORDS): 1467 copy = self._prev.text.lower() == "copy" 1468 clone = self.expression( 1469 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1470 ) 1471 1472 if self._curr: 1473 return self._parse_as_command(start) 1474 1475 return self.expression( 1476 exp.Create, 1477 comments=comments, 1478 this=this, 1479 kind=create_token.text.upper(), 1480 replace=replace, 1481 unique=unique, 1482 expression=expression, 1483 exists=exists, 1484 properties=properties, 1485 indexes=indexes, 1486 no_schema_binding=no_schema_binding, 1487 begin=begin, 1488 end=end, 1489 clone=clone, 1490 ) 1491 1492 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1493 # only used for teradata currently 1494 self._match(TokenType.COMMA) 1495 1496 kwargs = { 1497 "no": self._match_text_seq("NO"), 1498 "dual": self._match_text_seq("DUAL"), 1499 "before": self._match_text_seq("BEFORE"), 1500 "default": self._match_text_seq("DEFAULT"), 1501 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1502 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1503 "after": self._match_text_seq("AFTER"), 1504 "minimum": self._match_texts(("MIN", "MINIMUM")), 1505 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1506 } 1507 1508 if self._match_texts(self.PROPERTY_PARSERS): 1509 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1510 try: 1511 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1512 except TypeError: 1513 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1514 1515 return None 1516 1517 def _parse_property(self) -> t.Optional[exp.Expression]: 1518 if self._match_texts(self.PROPERTY_PARSERS): 1519 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1520 1521 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1522 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1523 1524 if self._match_text_seq("COMPOUND", "SORTKEY"): 1525 return self._parse_sortkey(compound=True) 1526 1527 if self._match_text_seq("SQL", "SECURITY"): 1528 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1529 1530 index = self._index 1531 key = self._parse_column() 1532 1533 if not self._match(TokenType.EQ): 1534 self._retreat(index) 1535 return None 1536 1537 return self.expression( 1538 exp.Property, 1539 this=key.to_dot() if isinstance(key, exp.Column) else key, 1540 value=self._parse_column() or self._parse_var(any_token=True), 1541 ) 1542 1543 def _parse_stored(self) -> exp.FileFormatProperty: 1544 self._match(TokenType.ALIAS) 1545 1546 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1547 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1548 1549 return self.expression( 1550 exp.FileFormatProperty, 1551 this=( 1552 self.expression( 1553 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1554 ) 1555 if input_format or output_format 1556 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1557 ), 1558 ) 1559 1560 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1561 self._match(TokenType.EQ) 1562 self._match(TokenType.ALIAS) 1563 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1564 1565 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1566 properties = [] 1567 while True: 1568 if before: 1569 prop = self._parse_property_before() 1570 else: 1571 prop = self._parse_property() 1572 1573 if not prop: 1574 break 1575 for p in ensure_list(prop): 1576 properties.append(p) 1577 1578 if properties: 1579 return self.expression(exp.Properties, expressions=properties) 1580 1581 return None 1582 1583 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1584 return self.expression( 1585 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1586 ) 1587 1588 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1589 if self._index >= 2: 1590 pre_volatile_token = self._tokens[self._index - 2] 1591 else: 1592 pre_volatile_token = None 1593 1594 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1595 return exp.VolatileProperty() 1596 1597 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1598 1599 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1600 self._match_pair(TokenType.EQ, TokenType.ON) 1601 1602 prop = self.expression(exp.WithSystemVersioningProperty) 1603 if self._match(TokenType.L_PAREN): 1604 self._match_text_seq("HISTORY_TABLE", "=") 1605 prop.set("this", self._parse_table_parts()) 1606 1607 if self._match(TokenType.COMMA): 1608 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1609 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1610 1611 self._match_r_paren() 1612 1613 return prop 1614 1615 def _parse_with_property( 1616 self, 1617 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1618 if self._match(TokenType.L_PAREN, advance=False): 1619 return self._parse_wrapped_csv(self._parse_property) 1620 1621 if self._match_text_seq("JOURNAL"): 1622 return self._parse_withjournaltable() 1623 1624 if self._match_text_seq("DATA"): 1625 return self._parse_withdata(no=False) 1626 elif self._match_text_seq("NO", "DATA"): 1627 return self._parse_withdata(no=True) 1628 1629 if not self._next: 1630 return None 1631 1632 return self._parse_withisolatedloading() 1633 1634 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1635 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1636 self._match(TokenType.EQ) 1637 1638 user = self._parse_id_var() 1639 self._match(TokenType.PARAMETER) 1640 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1641 1642 if not user or not host: 1643 return None 1644 1645 return exp.DefinerProperty(this=f"{user}@{host}") 1646 1647 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1648 self._match(TokenType.TABLE) 1649 self._match(TokenType.EQ) 1650 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1651 1652 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1653 return self.expression(exp.LogProperty, no=no) 1654 1655 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1656 return self.expression(exp.JournalProperty, **kwargs) 1657 1658 def _parse_checksum(self) -> exp.ChecksumProperty: 1659 self._match(TokenType.EQ) 1660 1661 on = None 1662 if self._match(TokenType.ON): 1663 on = True 1664 elif self._match_text_seq("OFF"): 1665 on = False 1666 1667 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1668 1669 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1670 return self.expression( 1671 exp.Cluster, 1672 expressions=( 1673 self._parse_wrapped_csv(self._parse_ordered) 1674 if wrapped 1675 else self._parse_csv(self._parse_ordered) 1676 ), 1677 ) 1678 1679 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1680 self._match_text_seq("BY") 1681 1682 self._match_l_paren() 1683 expressions = self._parse_csv(self._parse_column) 1684 self._match_r_paren() 1685 1686 if self._match_text_seq("SORTED", "BY"): 1687 self._match_l_paren() 1688 sorted_by = self._parse_csv(self._parse_ordered) 1689 self._match_r_paren() 1690 else: 1691 sorted_by = None 1692 1693 self._match(TokenType.INTO) 1694 buckets = self._parse_number() 1695 self._match_text_seq("BUCKETS") 1696 1697 return self.expression( 1698 exp.ClusteredByProperty, 1699 expressions=expressions, 1700 sorted_by=sorted_by, 1701 buckets=buckets, 1702 ) 1703 1704 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1705 if not self._match_text_seq("GRANTS"): 1706 self._retreat(self._index - 1) 1707 return None 1708 1709 return self.expression(exp.CopyGrantsProperty) 1710 1711 def _parse_freespace(self) -> exp.FreespaceProperty: 1712 self._match(TokenType.EQ) 1713 return self.expression( 1714 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1715 ) 1716 1717 def _parse_mergeblockratio( 1718 self, no: bool = False, default: bool = False 1719 ) -> exp.MergeBlockRatioProperty: 1720 if self._match(TokenType.EQ): 1721 return self.expression( 1722 exp.MergeBlockRatioProperty, 1723 this=self._parse_number(), 1724 percent=self._match(TokenType.PERCENT), 1725 ) 1726 1727 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1728 1729 def _parse_datablocksize( 1730 self, 1731 default: t.Optional[bool] = None, 1732 minimum: t.Optional[bool] = None, 1733 maximum: t.Optional[bool] = None, 1734 ) -> exp.DataBlocksizeProperty: 1735 self._match(TokenType.EQ) 1736 size = self._parse_number() 1737 1738 units = None 1739 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1740 units = self._prev.text 1741 1742 return self.expression( 1743 exp.DataBlocksizeProperty, 1744 size=size, 1745 units=units, 1746 default=default, 1747 minimum=minimum, 1748 maximum=maximum, 1749 ) 1750 1751 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1752 self._match(TokenType.EQ) 1753 always = self._match_text_seq("ALWAYS") 1754 manual = self._match_text_seq("MANUAL") 1755 never = self._match_text_seq("NEVER") 1756 default = self._match_text_seq("DEFAULT") 1757 1758 autotemp = None 1759 if self._match_text_seq("AUTOTEMP"): 1760 autotemp = self._parse_schema() 1761 1762 return self.expression( 1763 exp.BlockCompressionProperty, 1764 always=always, 1765 manual=manual, 1766 never=never, 1767 default=default, 1768 autotemp=autotemp, 1769 ) 1770 1771 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1772 no = self._match_text_seq("NO") 1773 concurrent = self._match_text_seq("CONCURRENT") 1774 self._match_text_seq("ISOLATED", "LOADING") 1775 for_all = self._match_text_seq("FOR", "ALL") 1776 for_insert = self._match_text_seq("FOR", "INSERT") 1777 for_none = self._match_text_seq("FOR", "NONE") 1778 return self.expression( 1779 exp.IsolatedLoadingProperty, 1780 no=no, 1781 concurrent=concurrent, 1782 for_all=for_all, 1783 for_insert=for_insert, 1784 for_none=for_none, 1785 ) 1786 1787 def _parse_locking(self) -> exp.LockingProperty: 1788 if self._match(TokenType.TABLE): 1789 kind = "TABLE" 1790 elif self._match(TokenType.VIEW): 1791 kind = "VIEW" 1792 elif self._match(TokenType.ROW): 1793 kind = "ROW" 1794 elif self._match_text_seq("DATABASE"): 1795 kind = "DATABASE" 1796 else: 1797 kind = None 1798 1799 if kind in ("DATABASE", "TABLE", "VIEW"): 1800 this = self._parse_table_parts() 1801 else: 1802 this = None 1803 1804 if self._match(TokenType.FOR): 1805 for_or_in = "FOR" 1806 elif self._match(TokenType.IN): 1807 for_or_in = "IN" 1808 else: 1809 for_or_in = None 1810 1811 if self._match_text_seq("ACCESS"): 1812 lock_type = "ACCESS" 1813 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1814 lock_type = "EXCLUSIVE" 1815 elif self._match_text_seq("SHARE"): 1816 lock_type = "SHARE" 1817 elif self._match_text_seq("READ"): 1818 lock_type = "READ" 1819 elif self._match_text_seq("WRITE"): 1820 lock_type = "WRITE" 1821 elif self._match_text_seq("CHECKSUM"): 1822 lock_type = "CHECKSUM" 1823 else: 1824 lock_type = None 1825 1826 override = self._match_text_seq("OVERRIDE") 1827 1828 return self.expression( 1829 exp.LockingProperty, 1830 this=this, 1831 kind=kind, 1832 for_or_in=for_or_in, 1833 lock_type=lock_type, 1834 override=override, 1835 ) 1836 1837 def _parse_partition_by(self) -> t.List[exp.Expression]: 1838 if self._match(TokenType.PARTITION_BY): 1839 return self._parse_csv(self._parse_conjunction) 1840 return [] 1841 1842 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1843 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1844 if self._match_text_seq("MINVALUE"): 1845 return exp.var("MINVALUE") 1846 if self._match_text_seq("MAXVALUE"): 1847 return exp.var("MAXVALUE") 1848 return self._parse_bitwise() 1849 1850 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1851 expression = None 1852 from_expressions = None 1853 to_expressions = None 1854 1855 if self._match(TokenType.IN): 1856 this = self._parse_wrapped_csv(self._parse_bitwise) 1857 elif self._match(TokenType.FROM): 1858 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1859 self._match_text_seq("TO") 1860 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1861 elif self._match_text_seq("WITH", "(", "MODULUS"): 1862 this = self._parse_number() 1863 self._match_text_seq(",", "REMAINDER") 1864 expression = self._parse_number() 1865 self._match_r_paren() 1866 else: 1867 self.raise_error("Failed to parse partition bound spec.") 1868 1869 return self.expression( 1870 exp.PartitionBoundSpec, 1871 this=this, 1872 expression=expression, 1873 from_expressions=from_expressions, 1874 to_expressions=to_expressions, 1875 ) 1876 1877 # https://www.postgresql.org/docs/current/sql-createtable.html 1878 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1879 if not self._match_text_seq("OF"): 1880 self._retreat(self._index - 1) 1881 return None 1882 1883 this = self._parse_table(schema=True) 1884 1885 if self._match(TokenType.DEFAULT): 1886 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1887 elif self._match_text_seq("FOR", "VALUES"): 1888 expression = self._parse_partition_bound_spec() 1889 else: 1890 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1891 1892 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1893 1894 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1895 self._match(TokenType.EQ) 1896 return self.expression( 1897 exp.PartitionedByProperty, 1898 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1899 ) 1900 1901 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1902 if self._match_text_seq("AND", "STATISTICS"): 1903 statistics = True 1904 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1905 statistics = False 1906 else: 1907 statistics = None 1908 1909 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1910 1911 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1912 if self._match_text_seq("SQL"): 1913 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 1914 return None 1915 1916 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1917 if self._match_text_seq("SQL", "DATA"): 1918 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 1919 return None 1920 1921 def _parse_no_property(self) -> t.Optional[exp.Expression]: 1922 if self._match_text_seq("PRIMARY", "INDEX"): 1923 return exp.NoPrimaryIndexProperty() 1924 if self._match_text_seq("SQL"): 1925 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 1926 return None 1927 1928 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1929 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1930 return exp.OnCommitProperty() 1931 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1932 return exp.OnCommitProperty(delete=True) 1933 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1934 1935 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1936 if self._match_text_seq("SQL", "DATA"): 1937 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 1938 return None 1939 1940 def _parse_distkey(self) -> exp.DistKeyProperty: 1941 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1942 1943 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1944 table = self._parse_table(schema=True) 1945 1946 options = [] 1947 while self._match_texts(("INCLUDING", "EXCLUDING")): 1948 this = self._prev.text.upper() 1949 1950 id_var = self._parse_id_var() 1951 if not id_var: 1952 return None 1953 1954 options.append( 1955 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1956 ) 1957 1958 return self.expression(exp.LikeProperty, this=table, expressions=options) 1959 1960 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1961 return self.expression( 1962 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1963 ) 1964 1965 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1966 self._match(TokenType.EQ) 1967 return self.expression( 1968 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1969 ) 1970 1971 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1972 self._match_text_seq("WITH", "CONNECTION") 1973 return self.expression( 1974 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1975 ) 1976 1977 def _parse_returns(self) -> exp.ReturnsProperty: 1978 value: t.Optional[exp.Expression] 1979 is_table = self._match(TokenType.TABLE) 1980 1981 if is_table: 1982 if self._match(TokenType.LT): 1983 value = self.expression( 1984 exp.Schema, 1985 this="TABLE", 1986 expressions=self._parse_csv(self._parse_struct_types), 1987 ) 1988 if not self._match(TokenType.GT): 1989 self.raise_error("Expecting >") 1990 else: 1991 value = self._parse_schema(exp.var("TABLE")) 1992 else: 1993 value = self._parse_types() 1994 1995 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1996 1997 def _parse_describe(self) -> exp.Describe: 1998 kind = self._match_set(self.CREATABLES) and self._prev.text 1999 extended = self._match_text_seq("EXTENDED") 2000 this = self._parse_table(schema=True) 2001 properties = self._parse_properties() 2002 expressions = properties.expressions if properties else None 2003 return self.expression( 2004 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2005 ) 2006 2007 def _parse_insert(self) -> exp.Insert: 2008 comments = ensure_list(self._prev_comments) 2009 overwrite = self._match(TokenType.OVERWRITE) 2010 ignore = self._match(TokenType.IGNORE) 2011 local = self._match_text_seq("LOCAL") 2012 alternative = None 2013 2014 if self._match_text_seq("DIRECTORY"): 2015 this: t.Optional[exp.Expression] = self.expression( 2016 exp.Directory, 2017 this=self._parse_var_or_string(), 2018 local=local, 2019 row_format=self._parse_row_format(match_row=True), 2020 ) 2021 else: 2022 if self._match(TokenType.OR): 2023 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2024 2025 self._match(TokenType.INTO) 2026 comments += ensure_list(self._prev_comments) 2027 self._match(TokenType.TABLE) 2028 this = self._parse_table(schema=True) 2029 2030 returning = self._parse_returning() 2031 2032 return self.expression( 2033 exp.Insert, 2034 comments=comments, 2035 this=this, 2036 by_name=self._match_text_seq("BY", "NAME"), 2037 exists=self._parse_exists(), 2038 partition=self._parse_partition(), 2039 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2040 and self._parse_conjunction(), 2041 expression=self._parse_ddl_select(), 2042 conflict=self._parse_on_conflict(), 2043 returning=returning or self._parse_returning(), 2044 overwrite=overwrite, 2045 alternative=alternative, 2046 ignore=ignore, 2047 ) 2048 2049 def _parse_kill(self) -> exp.Kill: 2050 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2051 2052 return self.expression( 2053 exp.Kill, 2054 this=self._parse_primary(), 2055 kind=kind, 2056 ) 2057 2058 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2059 conflict = self._match_text_seq("ON", "CONFLICT") 2060 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2061 2062 if not conflict and not duplicate: 2063 return None 2064 2065 nothing = None 2066 expressions = None 2067 key = None 2068 constraint = None 2069 2070 if conflict: 2071 if self._match_text_seq("ON", "CONSTRAINT"): 2072 constraint = self._parse_id_var() 2073 else: 2074 key = self._parse_csv(self._parse_value) 2075 2076 self._match_text_seq("DO") 2077 if self._match_text_seq("NOTHING"): 2078 nothing = True 2079 else: 2080 self._match(TokenType.UPDATE) 2081 self._match(TokenType.SET) 2082 expressions = self._parse_csv(self._parse_equality) 2083 2084 return self.expression( 2085 exp.OnConflict, 2086 duplicate=duplicate, 2087 expressions=expressions, 2088 nothing=nothing, 2089 key=key, 2090 constraint=constraint, 2091 ) 2092 2093 def _parse_returning(self) -> t.Optional[exp.Returning]: 2094 if not self._match(TokenType.RETURNING): 2095 return None 2096 return self.expression( 2097 exp.Returning, 2098 expressions=self._parse_csv(self._parse_expression), 2099 into=self._match(TokenType.INTO) and self._parse_table_part(), 2100 ) 2101 2102 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2103 if not self._match(TokenType.FORMAT): 2104 return None 2105 return self._parse_row_format() 2106 2107 def _parse_row_format( 2108 self, match_row: bool = False 2109 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2110 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2111 return None 2112 2113 if self._match_text_seq("SERDE"): 2114 this = self._parse_string() 2115 2116 serde_properties = None 2117 if self._match(TokenType.SERDE_PROPERTIES): 2118 serde_properties = self.expression( 2119 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2120 ) 2121 2122 return self.expression( 2123 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2124 ) 2125 2126 self._match_text_seq("DELIMITED") 2127 2128 kwargs = {} 2129 2130 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2131 kwargs["fields"] = self._parse_string() 2132 if self._match_text_seq("ESCAPED", "BY"): 2133 kwargs["escaped"] = self._parse_string() 2134 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2135 kwargs["collection_items"] = self._parse_string() 2136 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2137 kwargs["map_keys"] = self._parse_string() 2138 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2139 kwargs["lines"] = self._parse_string() 2140 if self._match_text_seq("NULL", "DEFINED", "AS"): 2141 kwargs["null"] = self._parse_string() 2142 2143 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2144 2145 def _parse_load(self) -> exp.LoadData | exp.Command: 2146 if self._match_text_seq("DATA"): 2147 local = self._match_text_seq("LOCAL") 2148 self._match_text_seq("INPATH") 2149 inpath = self._parse_string() 2150 overwrite = self._match(TokenType.OVERWRITE) 2151 self._match_pair(TokenType.INTO, TokenType.TABLE) 2152 2153 return self.expression( 2154 exp.LoadData, 2155 this=self._parse_table(schema=True), 2156 local=local, 2157 overwrite=overwrite, 2158 inpath=inpath, 2159 partition=self._parse_partition(), 2160 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2161 serde=self._match_text_seq("SERDE") and self._parse_string(), 2162 ) 2163 return self._parse_as_command(self._prev) 2164 2165 def _parse_delete(self) -> exp.Delete: 2166 # This handles MySQL's "Multiple-Table Syntax" 2167 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2168 tables = None 2169 comments = self._prev_comments 2170 if not self._match(TokenType.FROM, advance=False): 2171 tables = self._parse_csv(self._parse_table) or None 2172 2173 returning = self._parse_returning() 2174 2175 return self.expression( 2176 exp.Delete, 2177 comments=comments, 2178 tables=tables, 2179 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2180 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2181 where=self._parse_where(), 2182 returning=returning or self._parse_returning(), 2183 limit=self._parse_limit(), 2184 ) 2185 2186 def _parse_update(self) -> exp.Update: 2187 comments = self._prev_comments 2188 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2189 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2190 returning = self._parse_returning() 2191 return self.expression( 2192 exp.Update, 2193 comments=comments, 2194 **{ # type: ignore 2195 "this": this, 2196 "expressions": expressions, 2197 "from": self._parse_from(joins=True), 2198 "where": self._parse_where(), 2199 "returning": returning or self._parse_returning(), 2200 "order": self._parse_order(), 2201 "limit": self._parse_limit(), 2202 }, 2203 ) 2204 2205 def _parse_uncache(self) -> exp.Uncache: 2206 if not self._match(TokenType.TABLE): 2207 self.raise_error("Expecting TABLE after UNCACHE") 2208 2209 return self.expression( 2210 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2211 ) 2212 2213 def _parse_cache(self) -> exp.Cache: 2214 lazy = self._match_text_seq("LAZY") 2215 self._match(TokenType.TABLE) 2216 table = self._parse_table(schema=True) 2217 2218 options = [] 2219 if self._match_text_seq("OPTIONS"): 2220 self._match_l_paren() 2221 k = self._parse_string() 2222 self._match(TokenType.EQ) 2223 v = self._parse_string() 2224 options = [k, v] 2225 self._match_r_paren() 2226 2227 self._match(TokenType.ALIAS) 2228 return self.expression( 2229 exp.Cache, 2230 this=table, 2231 lazy=lazy, 2232 options=options, 2233 expression=self._parse_select(nested=True), 2234 ) 2235 2236 def _parse_partition(self) -> t.Optional[exp.Partition]: 2237 if not self._match(TokenType.PARTITION): 2238 return None 2239 2240 return self.expression( 2241 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2242 ) 2243 2244 def _parse_value(self) -> exp.Tuple: 2245 if self._match(TokenType.L_PAREN): 2246 expressions = self._parse_csv(self._parse_expression) 2247 self._match_r_paren() 2248 return self.expression(exp.Tuple, expressions=expressions) 2249 2250 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2251 # https://prestodb.io/docs/current/sql/values.html 2252 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2253 2254 def _parse_projections(self) -> t.List[exp.Expression]: 2255 return self._parse_expressions() 2256 2257 def _parse_select( 2258 self, 2259 nested: bool = False, 2260 table: bool = False, 2261 parse_subquery_alias: bool = True, 2262 parse_set_operation: bool = True, 2263 ) -> t.Optional[exp.Expression]: 2264 cte = self._parse_with() 2265 2266 if cte: 2267 this = self._parse_statement() 2268 2269 if not this: 2270 self.raise_error("Failed to parse any statement following CTE") 2271 return cte 2272 2273 if "with" in this.arg_types: 2274 this.set("with", cte) 2275 else: 2276 self.raise_error(f"{this.key} does not support CTE") 2277 this = cte 2278 2279 return this 2280 2281 # duckdb supports leading with FROM x 2282 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2283 2284 if self._match(TokenType.SELECT): 2285 comments = self._prev_comments 2286 2287 hint = self._parse_hint() 2288 all_ = self._match(TokenType.ALL) 2289 distinct = self._match_set(self.DISTINCT_TOKENS) 2290 2291 kind = ( 2292 self._match(TokenType.ALIAS) 2293 and self._match_texts(("STRUCT", "VALUE")) 2294 and self._prev.text.upper() 2295 ) 2296 2297 if distinct: 2298 distinct = self.expression( 2299 exp.Distinct, 2300 on=self._parse_value() if self._match(TokenType.ON) else None, 2301 ) 2302 2303 if all_ and distinct: 2304 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2305 2306 limit = self._parse_limit(top=True) 2307 projections = self._parse_projections() 2308 2309 this = self.expression( 2310 exp.Select, 2311 kind=kind, 2312 hint=hint, 2313 distinct=distinct, 2314 expressions=projections, 2315 limit=limit, 2316 ) 2317 this.comments = comments 2318 2319 into = self._parse_into() 2320 if into: 2321 this.set("into", into) 2322 2323 if not from_: 2324 from_ = self._parse_from() 2325 2326 if from_: 2327 this.set("from", from_) 2328 2329 this = self._parse_query_modifiers(this) 2330 elif (table or nested) and self._match(TokenType.L_PAREN): 2331 if self._match(TokenType.PIVOT): 2332 this = self._parse_simplified_pivot() 2333 elif self._match(TokenType.FROM): 2334 this = exp.select("*").from_( 2335 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2336 ) 2337 else: 2338 this = ( 2339 self._parse_table() 2340 if table 2341 else self._parse_select(nested=True, parse_set_operation=False) 2342 ) 2343 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2344 2345 self._match_r_paren() 2346 2347 # We return early here so that the UNION isn't attached to the subquery by the 2348 # following call to _parse_set_operations, but instead becomes the parent node 2349 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2350 elif self._match(TokenType.VALUES): 2351 this = self.expression( 2352 exp.Values, 2353 expressions=self._parse_csv(self._parse_value), 2354 alias=self._parse_table_alias(), 2355 ) 2356 elif from_: 2357 this = exp.select("*").from_(from_.this, copy=False) 2358 else: 2359 this = None 2360 2361 if parse_set_operation: 2362 return self._parse_set_operations(this) 2363 return this 2364 2365 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2366 if not skip_with_token and not self._match(TokenType.WITH): 2367 return None 2368 2369 comments = self._prev_comments 2370 recursive = self._match(TokenType.RECURSIVE) 2371 2372 expressions = [] 2373 while True: 2374 expressions.append(self._parse_cte()) 2375 2376 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2377 break 2378 else: 2379 self._match(TokenType.WITH) 2380 2381 return self.expression( 2382 exp.With, comments=comments, expressions=expressions, recursive=recursive 2383 ) 2384 2385 def _parse_cte(self) -> exp.CTE: 2386 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2387 if not alias or not alias.this: 2388 self.raise_error("Expected CTE to have alias") 2389 2390 self._match(TokenType.ALIAS) 2391 return self.expression( 2392 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2393 ) 2394 2395 def _parse_table_alias( 2396 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2397 ) -> t.Optional[exp.TableAlias]: 2398 any_token = self._match(TokenType.ALIAS) 2399 alias = ( 2400 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2401 or self._parse_string_as_identifier() 2402 ) 2403 2404 index = self._index 2405 if self._match(TokenType.L_PAREN): 2406 columns = self._parse_csv(self._parse_function_parameter) 2407 self._match_r_paren() if columns else self._retreat(index) 2408 else: 2409 columns = None 2410 2411 if not alias and not columns: 2412 return None 2413 2414 return self.expression(exp.TableAlias, this=alias, columns=columns) 2415 2416 def _parse_subquery( 2417 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2418 ) -> t.Optional[exp.Subquery]: 2419 if not this: 2420 return None 2421 2422 return self.expression( 2423 exp.Subquery, 2424 this=this, 2425 pivots=self._parse_pivots(), 2426 alias=self._parse_table_alias() if parse_alias else None, 2427 ) 2428 2429 def _parse_query_modifiers( 2430 self, this: t.Optional[exp.Expression] 2431 ) -> t.Optional[exp.Expression]: 2432 if isinstance(this, self.MODIFIABLES): 2433 for join in iter(self._parse_join, None): 2434 this.append("joins", join) 2435 for lateral in iter(self._parse_lateral, None): 2436 this.append("laterals", lateral) 2437 2438 while True: 2439 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2440 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2441 key, expression = parser(self) 2442 2443 if expression: 2444 this.set(key, expression) 2445 if key == "limit": 2446 offset = expression.args.pop("offset", None) 2447 if offset: 2448 this.set("offset", exp.Offset(expression=offset)) 2449 continue 2450 break 2451 return this 2452 2453 def _parse_hint(self) -> t.Optional[exp.Hint]: 2454 if self._match(TokenType.HINT): 2455 hints = [] 2456 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2457 hints.extend(hint) 2458 2459 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2460 self.raise_error("Expected */ after HINT") 2461 2462 return self.expression(exp.Hint, expressions=hints) 2463 2464 return None 2465 2466 def _parse_into(self) -> t.Optional[exp.Into]: 2467 if not self._match(TokenType.INTO): 2468 return None 2469 2470 temp = self._match(TokenType.TEMPORARY) 2471 unlogged = self._match_text_seq("UNLOGGED") 2472 self._match(TokenType.TABLE) 2473 2474 return self.expression( 2475 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2476 ) 2477 2478 def _parse_from( 2479 self, joins: bool = False, skip_from_token: bool = False 2480 ) -> t.Optional[exp.From]: 2481 if not skip_from_token and not self._match(TokenType.FROM): 2482 return None 2483 2484 return self.expression( 2485 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2486 ) 2487 2488 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2489 if not self._match(TokenType.MATCH_RECOGNIZE): 2490 return None 2491 2492 self._match_l_paren() 2493 2494 partition = self._parse_partition_by() 2495 order = self._parse_order() 2496 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2497 2498 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2499 rows = exp.var("ONE ROW PER MATCH") 2500 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2501 text = "ALL ROWS PER MATCH" 2502 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2503 text += f" SHOW EMPTY MATCHES" 2504 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2505 text += f" OMIT EMPTY MATCHES" 2506 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2507 text += f" WITH UNMATCHED ROWS" 2508 rows = exp.var(text) 2509 else: 2510 rows = None 2511 2512 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2513 text = "AFTER MATCH SKIP" 2514 if self._match_text_seq("PAST", "LAST", "ROW"): 2515 text += f" PAST LAST ROW" 2516 elif self._match_text_seq("TO", "NEXT", "ROW"): 2517 text += f" TO NEXT ROW" 2518 elif self._match_text_seq("TO", "FIRST"): 2519 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2520 elif self._match_text_seq("TO", "LAST"): 2521 text += f" TO LAST {self._advance_any().text}" # type: ignore 2522 after = exp.var(text) 2523 else: 2524 after = None 2525 2526 if self._match_text_seq("PATTERN"): 2527 self._match_l_paren() 2528 2529 if not self._curr: 2530 self.raise_error("Expecting )", self._curr) 2531 2532 paren = 1 2533 start = self._curr 2534 2535 while self._curr and paren > 0: 2536 if self._curr.token_type == TokenType.L_PAREN: 2537 paren += 1 2538 if self._curr.token_type == TokenType.R_PAREN: 2539 paren -= 1 2540 2541 end = self._prev 2542 self._advance() 2543 2544 if paren > 0: 2545 self.raise_error("Expecting )", self._curr) 2546 2547 pattern = exp.var(self._find_sql(start, end)) 2548 else: 2549 pattern = None 2550 2551 define = ( 2552 self._parse_csv(self._parse_name_as_expression) 2553 if self._match_text_seq("DEFINE") 2554 else None 2555 ) 2556 2557 self._match_r_paren() 2558 2559 return self.expression( 2560 exp.MatchRecognize, 2561 partition_by=partition, 2562 order=order, 2563 measures=measures, 2564 rows=rows, 2565 after=after, 2566 pattern=pattern, 2567 define=define, 2568 alias=self._parse_table_alias(), 2569 ) 2570 2571 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2572 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2573 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2574 cross_apply = False 2575 2576 if cross_apply is not None: 2577 this = self._parse_select(table=True) 2578 view = None 2579 outer = None 2580 elif self._match(TokenType.LATERAL): 2581 this = self._parse_select(table=True) 2582 view = self._match(TokenType.VIEW) 2583 outer = self._match(TokenType.OUTER) 2584 else: 2585 return None 2586 2587 if not this: 2588 this = ( 2589 self._parse_unnest() 2590 or self._parse_function() 2591 or self._parse_id_var(any_token=False) 2592 ) 2593 2594 while self._match(TokenType.DOT): 2595 this = exp.Dot( 2596 this=this, 2597 expression=self._parse_function() or self._parse_id_var(any_token=False), 2598 ) 2599 2600 if view: 2601 table = self._parse_id_var(any_token=False) 2602 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2603 table_alias: t.Optional[exp.TableAlias] = self.expression( 2604 exp.TableAlias, this=table, columns=columns 2605 ) 2606 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2607 # We move the alias from the lateral's child node to the lateral itself 2608 table_alias = this.args["alias"].pop() 2609 else: 2610 table_alias = self._parse_table_alias() 2611 2612 return self.expression( 2613 exp.Lateral, 2614 this=this, 2615 view=view, 2616 outer=outer, 2617 alias=table_alias, 2618 cross_apply=cross_apply, 2619 ) 2620 2621 def _parse_join_parts( 2622 self, 2623 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2624 return ( 2625 self._match_set(self.JOIN_METHODS) and self._prev, 2626 self._match_set(self.JOIN_SIDES) and self._prev, 2627 self._match_set(self.JOIN_KINDS) and self._prev, 2628 ) 2629 2630 def _parse_join( 2631 self, skip_join_token: bool = False, parse_bracket: bool = False 2632 ) -> t.Optional[exp.Join]: 2633 if self._match(TokenType.COMMA): 2634 return self.expression(exp.Join, this=self._parse_table()) 2635 2636 index = self._index 2637 method, side, kind = self._parse_join_parts() 2638 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2639 join = self._match(TokenType.JOIN) 2640 2641 if not skip_join_token and not join: 2642 self._retreat(index) 2643 kind = None 2644 method = None 2645 side = None 2646 2647 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2648 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2649 2650 if not skip_join_token and not join and not outer_apply and not cross_apply: 2651 return None 2652 2653 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2654 2655 if method: 2656 kwargs["method"] = method.text 2657 if side: 2658 kwargs["side"] = side.text 2659 if kind: 2660 kwargs["kind"] = kind.text 2661 if hint: 2662 kwargs["hint"] = hint 2663 2664 if self._match(TokenType.ON): 2665 kwargs["on"] = self._parse_conjunction() 2666 elif self._match(TokenType.USING): 2667 kwargs["using"] = self._parse_wrapped_id_vars() 2668 elif not (kind and kind.token_type == TokenType.CROSS): 2669 index = self._index 2670 join = self._parse_join() 2671 2672 if join and self._match(TokenType.ON): 2673 kwargs["on"] = self._parse_conjunction() 2674 elif join and self._match(TokenType.USING): 2675 kwargs["using"] = self._parse_wrapped_id_vars() 2676 else: 2677 join = None 2678 self._retreat(index) 2679 2680 kwargs["this"].set("joins", [join] if join else None) 2681 2682 comments = [c for token in (method, side, kind) if token for c in token.comments] 2683 return self.expression(exp.Join, comments=comments, **kwargs) 2684 2685 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2686 this = self._parse_conjunction() 2687 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2688 return this 2689 2690 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2691 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2692 2693 return this 2694 2695 def _parse_index( 2696 self, 2697 index: t.Optional[exp.Expression] = None, 2698 ) -> t.Optional[exp.Index]: 2699 if index: 2700 unique = None 2701 primary = None 2702 amp = None 2703 2704 self._match(TokenType.ON) 2705 self._match(TokenType.TABLE) # hive 2706 table = self._parse_table_parts(schema=True) 2707 else: 2708 unique = self._match(TokenType.UNIQUE) 2709 primary = self._match_text_seq("PRIMARY") 2710 amp = self._match_text_seq("AMP") 2711 2712 if not self._match(TokenType.INDEX): 2713 return None 2714 2715 index = self._parse_id_var() 2716 table = None 2717 2718 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2719 2720 if self._match(TokenType.L_PAREN, advance=False): 2721 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2722 else: 2723 columns = None 2724 2725 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2726 2727 return self.expression( 2728 exp.Index, 2729 this=index, 2730 table=table, 2731 using=using, 2732 columns=columns, 2733 unique=unique, 2734 primary=primary, 2735 amp=amp, 2736 include=include, 2737 partition_by=self._parse_partition_by(), 2738 where=self._parse_where(), 2739 ) 2740 2741 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2742 hints: t.List[exp.Expression] = [] 2743 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2744 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2745 hints.append( 2746 self.expression( 2747 exp.WithTableHint, 2748 expressions=self._parse_csv( 2749 lambda: self._parse_function() or self._parse_var(any_token=True) 2750 ), 2751 ) 2752 ) 2753 self._match_r_paren() 2754 else: 2755 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2756 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2757 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2758 2759 self._match_texts(("INDEX", "KEY")) 2760 if self._match(TokenType.FOR): 2761 hint.set("target", self._advance_any() and self._prev.text.upper()) 2762 2763 hint.set("expressions", self._parse_wrapped_id_vars()) 2764 hints.append(hint) 2765 2766 return hints or None 2767 2768 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2769 return ( 2770 (not schema and self._parse_function(optional_parens=False)) 2771 or self._parse_id_var(any_token=False) 2772 or self._parse_string_as_identifier() 2773 or self._parse_placeholder() 2774 ) 2775 2776 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2777 catalog = None 2778 db = None 2779 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2780 2781 while self._match(TokenType.DOT): 2782 if catalog: 2783 # This allows nesting the table in arbitrarily many dot expressions if needed 2784 table = self.expression( 2785 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2786 ) 2787 else: 2788 catalog = db 2789 db = table 2790 table = self._parse_table_part(schema=schema) or "" 2791 2792 if not table: 2793 self.raise_error(f"Expected table name but got {self._curr}") 2794 2795 return self.expression( 2796 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2797 ) 2798 2799 def _parse_table( 2800 self, 2801 schema: bool = False, 2802 joins: bool = False, 2803 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2804 parse_bracket: bool = False, 2805 ) -> t.Optional[exp.Expression]: 2806 lateral = self._parse_lateral() 2807 if lateral: 2808 return lateral 2809 2810 unnest = self._parse_unnest() 2811 if unnest: 2812 return unnest 2813 2814 values = self._parse_derived_table_values() 2815 if values: 2816 return values 2817 2818 subquery = self._parse_select(table=True) 2819 if subquery: 2820 if not subquery.args.get("pivots"): 2821 subquery.set("pivots", self._parse_pivots()) 2822 return subquery 2823 2824 bracket = parse_bracket and self._parse_bracket(None) 2825 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2826 this = t.cast( 2827 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2828 ) 2829 2830 if schema: 2831 return self._parse_schema(this=this) 2832 2833 version = self._parse_version() 2834 2835 if version: 2836 this.set("version", version) 2837 2838 if self.dialect.ALIAS_POST_TABLESAMPLE: 2839 table_sample = self._parse_table_sample() 2840 2841 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2842 if alias: 2843 this.set("alias", alias) 2844 2845 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2846 return self.expression( 2847 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2848 ) 2849 2850 this.set("hints", self._parse_table_hints()) 2851 2852 if not this.args.get("pivots"): 2853 this.set("pivots", self._parse_pivots()) 2854 2855 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2856 table_sample = self._parse_table_sample() 2857 2858 if table_sample: 2859 table_sample.set("this", this) 2860 this = table_sample 2861 2862 if joins: 2863 for join in iter(self._parse_join, None): 2864 this.append("joins", join) 2865 2866 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2867 this.set("ordinality", True) 2868 this.set("alias", self._parse_table_alias()) 2869 2870 return this 2871 2872 def _parse_version(self) -> t.Optional[exp.Version]: 2873 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2874 this = "TIMESTAMP" 2875 elif self._match(TokenType.VERSION_SNAPSHOT): 2876 this = "VERSION" 2877 else: 2878 return None 2879 2880 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2881 kind = self._prev.text.upper() 2882 start = self._parse_bitwise() 2883 self._match_texts(("TO", "AND")) 2884 end = self._parse_bitwise() 2885 expression: t.Optional[exp.Expression] = self.expression( 2886 exp.Tuple, expressions=[start, end] 2887 ) 2888 elif self._match_text_seq("CONTAINED", "IN"): 2889 kind = "CONTAINED IN" 2890 expression = self.expression( 2891 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2892 ) 2893 elif self._match(TokenType.ALL): 2894 kind = "ALL" 2895 expression = None 2896 else: 2897 self._match_text_seq("AS", "OF") 2898 kind = "AS OF" 2899 expression = self._parse_type() 2900 2901 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2902 2903 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2904 if not self._match(TokenType.UNNEST): 2905 return None 2906 2907 expressions = self._parse_wrapped_csv(self._parse_equality) 2908 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2909 2910 alias = self._parse_table_alias() if with_alias else None 2911 2912 if alias: 2913 if self.dialect.UNNEST_COLUMN_ONLY: 2914 if alias.args.get("columns"): 2915 self.raise_error("Unexpected extra column alias in unnest.") 2916 2917 alias.set("columns", [alias.this]) 2918 alias.set("this", None) 2919 2920 columns = alias.args.get("columns") or [] 2921 if offset and len(expressions) < len(columns): 2922 offset = columns.pop() 2923 2924 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2925 self._match(TokenType.ALIAS) 2926 offset = self._parse_id_var( 2927 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2928 ) or exp.to_identifier("offset") 2929 2930 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2931 2932 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2933 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2934 if not is_derived and not self._match(TokenType.VALUES): 2935 return None 2936 2937 expressions = self._parse_csv(self._parse_value) 2938 alias = self._parse_table_alias() 2939 2940 if is_derived: 2941 self._match_r_paren() 2942 2943 return self.expression( 2944 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2945 ) 2946 2947 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2948 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2949 as_modifier and self._match_text_seq("USING", "SAMPLE") 2950 ): 2951 return None 2952 2953 bucket_numerator = None 2954 bucket_denominator = None 2955 bucket_field = None 2956 percent = None 2957 size = None 2958 seed = None 2959 2960 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 2961 matched_l_paren = self._match(TokenType.L_PAREN) 2962 2963 if self.TABLESAMPLE_CSV: 2964 num = None 2965 expressions = self._parse_csv(self._parse_primary) 2966 else: 2967 expressions = None 2968 num = ( 2969 self._parse_factor() 2970 if self._match(TokenType.NUMBER, advance=False) 2971 else self._parse_primary() or self._parse_placeholder() 2972 ) 2973 2974 if self._match_text_seq("BUCKET"): 2975 bucket_numerator = self._parse_number() 2976 self._match_text_seq("OUT", "OF") 2977 bucket_denominator = bucket_denominator = self._parse_number() 2978 self._match(TokenType.ON) 2979 bucket_field = self._parse_field() 2980 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2981 percent = num 2982 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 2983 size = num 2984 else: 2985 percent = num 2986 2987 if matched_l_paren: 2988 self._match_r_paren() 2989 2990 if self._match(TokenType.L_PAREN): 2991 method = self._parse_var(upper=True) 2992 seed = self._match(TokenType.COMMA) and self._parse_number() 2993 self._match_r_paren() 2994 elif self._match_texts(("SEED", "REPEATABLE")): 2995 seed = self._parse_wrapped(self._parse_number) 2996 2997 return self.expression( 2998 exp.TableSample, 2999 expressions=expressions, 3000 method=method, 3001 bucket_numerator=bucket_numerator, 3002 bucket_denominator=bucket_denominator, 3003 bucket_field=bucket_field, 3004 percent=percent, 3005 size=size, 3006 seed=seed, 3007 ) 3008 3009 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3010 return list(iter(self._parse_pivot, None)) or None 3011 3012 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 3013 return list(iter(self._parse_join, None)) or None 3014 3015 # https://duckdb.org/docs/sql/statements/pivot 3016 def _parse_simplified_pivot(self) -> exp.Pivot: 3017 def _parse_on() -> t.Optional[exp.Expression]: 3018 this = self._parse_bitwise() 3019 return self._parse_in(this) if self._match(TokenType.IN) else this 3020 3021 this = self._parse_table() 3022 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3023 using = self._match(TokenType.USING) and self._parse_csv( 3024 lambda: self._parse_alias(self._parse_function()) 3025 ) 3026 group = self._parse_group() 3027 return self.expression( 3028 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3029 ) 3030 3031 def _parse_pivot_in(self) -> exp.In: 3032 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3033 this = self._parse_conjunction() 3034 3035 self._match(TokenType.ALIAS) 3036 alias = self._parse_field() 3037 if alias: 3038 return self.expression(exp.PivotAlias, this=this, alias=alias) 3039 3040 return this 3041 3042 value = self._parse_column() 3043 3044 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3045 self.raise_error("Expecting IN (") 3046 3047 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3048 3049 self._match_r_paren() 3050 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3051 3052 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3053 index = self._index 3054 include_nulls = None 3055 3056 if self._match(TokenType.PIVOT): 3057 unpivot = False 3058 elif self._match(TokenType.UNPIVOT): 3059 unpivot = True 3060 3061 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3062 if self._match_text_seq("INCLUDE", "NULLS"): 3063 include_nulls = True 3064 elif self._match_text_seq("EXCLUDE", "NULLS"): 3065 include_nulls = False 3066 else: 3067 return None 3068 3069 expressions = [] 3070 3071 if not self._match(TokenType.L_PAREN): 3072 self._retreat(index) 3073 return None 3074 3075 if unpivot: 3076 expressions = self._parse_csv(self._parse_column) 3077 else: 3078 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3079 3080 if not expressions: 3081 self.raise_error("Failed to parse PIVOT's aggregation list") 3082 3083 if not self._match(TokenType.FOR): 3084 self.raise_error("Expecting FOR") 3085 3086 field = self._parse_pivot_in() 3087 3088 self._match_r_paren() 3089 3090 pivot = self.expression( 3091 exp.Pivot, 3092 expressions=expressions, 3093 field=field, 3094 unpivot=unpivot, 3095 include_nulls=include_nulls, 3096 ) 3097 3098 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3099 pivot.set("alias", self._parse_table_alias()) 3100 3101 if not unpivot: 3102 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3103 3104 columns: t.List[exp.Expression] = [] 3105 for fld in pivot.args["field"].expressions: 3106 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3107 for name in names: 3108 if self.PREFIXED_PIVOT_COLUMNS: 3109 name = f"{name}_{field_name}" if name else field_name 3110 else: 3111 name = f"{field_name}_{name}" if name else field_name 3112 3113 columns.append(exp.to_identifier(name)) 3114 3115 pivot.set("columns", columns) 3116 3117 return pivot 3118 3119 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3120 return [agg.alias for agg in aggregations] 3121 3122 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3123 if not skip_where_token and not self._match(TokenType.WHERE): 3124 return None 3125 3126 return self.expression( 3127 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3128 ) 3129 3130 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3131 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3132 return None 3133 3134 elements = defaultdict(list) 3135 3136 if self._match(TokenType.ALL): 3137 return self.expression(exp.Group, all=True) 3138 3139 while True: 3140 expressions = self._parse_csv(self._parse_conjunction) 3141 if expressions: 3142 elements["expressions"].extend(expressions) 3143 3144 grouping_sets = self._parse_grouping_sets() 3145 if grouping_sets: 3146 elements["grouping_sets"].extend(grouping_sets) 3147 3148 rollup = None 3149 cube = None 3150 totals = None 3151 3152 index = self._index 3153 with_ = self._match(TokenType.WITH) 3154 if self._match(TokenType.ROLLUP): 3155 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3156 elements["rollup"].extend(ensure_list(rollup)) 3157 3158 if self._match(TokenType.CUBE): 3159 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3160 elements["cube"].extend(ensure_list(cube)) 3161 3162 if self._match_text_seq("TOTALS"): 3163 totals = True 3164 elements["totals"] = True # type: ignore 3165 3166 if not (grouping_sets or rollup or cube or totals): 3167 if with_: 3168 self._retreat(index) 3169 break 3170 3171 return self.expression(exp.Group, **elements) # type: ignore 3172 3173 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3174 if not self._match(TokenType.GROUPING_SETS): 3175 return None 3176 3177 return self._parse_wrapped_csv(self._parse_grouping_set) 3178 3179 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3180 if self._match(TokenType.L_PAREN): 3181 grouping_set = self._parse_csv(self._parse_column) 3182 self._match_r_paren() 3183 return self.expression(exp.Tuple, expressions=grouping_set) 3184 3185 return self._parse_column() 3186 3187 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3188 if not skip_having_token and not self._match(TokenType.HAVING): 3189 return None 3190 return self.expression(exp.Having, this=self._parse_conjunction()) 3191 3192 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3193 if not self._match(TokenType.QUALIFY): 3194 return None 3195 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3196 3197 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3198 if skip_start_token: 3199 start = None 3200 elif self._match(TokenType.START_WITH): 3201 start = self._parse_conjunction() 3202 else: 3203 return None 3204 3205 self._match(TokenType.CONNECT_BY) 3206 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3207 exp.Prior, this=self._parse_bitwise() 3208 ) 3209 connect = self._parse_conjunction() 3210 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3211 3212 if not start and self._match(TokenType.START_WITH): 3213 start = self._parse_conjunction() 3214 3215 return self.expression(exp.Connect, start=start, connect=connect) 3216 3217 def _parse_name_as_expression(self) -> exp.Alias: 3218 return self.expression( 3219 exp.Alias, 3220 alias=self._parse_id_var(any_token=True), 3221 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3222 ) 3223 3224 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3225 if self._match_text_seq("INTERPOLATE"): 3226 return self._parse_wrapped_csv(self._parse_name_as_expression) 3227 return None 3228 3229 def _parse_order( 3230 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3231 ) -> t.Optional[exp.Expression]: 3232 siblings = None 3233 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3234 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3235 return this 3236 3237 siblings = True 3238 3239 return self.expression( 3240 exp.Order, 3241 this=this, 3242 expressions=self._parse_csv(self._parse_ordered), 3243 interpolate=self._parse_interpolate(), 3244 siblings=siblings, 3245 ) 3246 3247 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3248 if not self._match(token): 3249 return None 3250 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3251 3252 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3253 this = parse_method() if parse_method else self._parse_conjunction() 3254 3255 asc = self._match(TokenType.ASC) 3256 desc = self._match(TokenType.DESC) or (asc and False) 3257 3258 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3259 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3260 3261 nulls_first = is_nulls_first or False 3262 explicitly_null_ordered = is_nulls_first or is_nulls_last 3263 3264 if ( 3265 not explicitly_null_ordered 3266 and ( 3267 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3268 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3269 ) 3270 and self.dialect.NULL_ORDERING != "nulls_are_last" 3271 ): 3272 nulls_first = True 3273 3274 if self._match_text_seq("WITH", "FILL"): 3275 with_fill = self.expression( 3276 exp.WithFill, 3277 **{ # type: ignore 3278 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3279 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3280 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3281 }, 3282 ) 3283 else: 3284 with_fill = None 3285 3286 return self.expression( 3287 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3288 ) 3289 3290 def _parse_limit( 3291 self, this: t.Optional[exp.Expression] = None, top: bool = False 3292 ) -> t.Optional[exp.Expression]: 3293 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3294 comments = self._prev_comments 3295 if top: 3296 limit_paren = self._match(TokenType.L_PAREN) 3297 expression = self._parse_term() if limit_paren else self._parse_number() 3298 3299 if limit_paren: 3300 self._match_r_paren() 3301 else: 3302 expression = self._parse_term() 3303 3304 if self._match(TokenType.COMMA): 3305 offset = expression 3306 expression = self._parse_term() 3307 else: 3308 offset = None 3309 3310 limit_exp = self.expression( 3311 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3312 ) 3313 3314 return limit_exp 3315 3316 if self._match(TokenType.FETCH): 3317 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3318 direction = self._prev.text.upper() if direction else "FIRST" 3319 3320 count = self._parse_field(tokens=self.FETCH_TOKENS) 3321 percent = self._match(TokenType.PERCENT) 3322 3323 self._match_set((TokenType.ROW, TokenType.ROWS)) 3324 3325 only = self._match_text_seq("ONLY") 3326 with_ties = self._match_text_seq("WITH", "TIES") 3327 3328 if only and with_ties: 3329 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3330 3331 return self.expression( 3332 exp.Fetch, 3333 direction=direction, 3334 count=count, 3335 percent=percent, 3336 with_ties=with_ties, 3337 ) 3338 3339 return this 3340 3341 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3342 if not self._match(TokenType.OFFSET): 3343 return this 3344 3345 count = self._parse_term() 3346 self._match_set((TokenType.ROW, TokenType.ROWS)) 3347 return self.expression(exp.Offset, this=this, expression=count) 3348 3349 def _parse_locks(self) -> t.List[exp.Lock]: 3350 locks = [] 3351 while True: 3352 if self._match_text_seq("FOR", "UPDATE"): 3353 update = True 3354 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3355 "LOCK", "IN", "SHARE", "MODE" 3356 ): 3357 update = False 3358 else: 3359 break 3360 3361 expressions = None 3362 if self._match_text_seq("OF"): 3363 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3364 3365 wait: t.Optional[bool | exp.Expression] = None 3366 if self._match_text_seq("NOWAIT"): 3367 wait = True 3368 elif self._match_text_seq("WAIT"): 3369 wait = self._parse_primary() 3370 elif self._match_text_seq("SKIP", "LOCKED"): 3371 wait = False 3372 3373 locks.append( 3374 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3375 ) 3376 3377 return locks 3378 3379 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3380 while this and self._match_set(self.SET_OPERATIONS): 3381 token_type = self._prev.token_type 3382 3383 if token_type == TokenType.UNION: 3384 operation = exp.Union 3385 elif token_type == TokenType.EXCEPT: 3386 operation = exp.Except 3387 else: 3388 operation = exp.Intersect 3389 3390 comments = self._prev.comments 3391 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3392 by_name = self._match_text_seq("BY", "NAME") 3393 expression = self._parse_select(nested=True, parse_set_operation=False) 3394 3395 this = self.expression( 3396 operation, 3397 comments=comments, 3398 this=this, 3399 distinct=distinct, 3400 by_name=by_name, 3401 expression=expression, 3402 ) 3403 3404 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3405 expression = this.expression 3406 3407 if expression: 3408 for arg in self.UNION_MODIFIERS: 3409 expr = expression.args.get(arg) 3410 if expr: 3411 this.set(arg, expr.pop()) 3412 3413 return this 3414 3415 def _parse_expression(self) -> t.Optional[exp.Expression]: 3416 return self._parse_alias(self._parse_conjunction()) 3417 3418 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3419 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3420 3421 def _parse_equality(self) -> t.Optional[exp.Expression]: 3422 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3423 3424 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3425 return self._parse_tokens(self._parse_range, self.COMPARISON) 3426 3427 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3428 this = this or self._parse_bitwise() 3429 negate = self._match(TokenType.NOT) 3430 3431 if self._match_set(self.RANGE_PARSERS): 3432 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3433 if not expression: 3434 return this 3435 3436 this = expression 3437 elif self._match(TokenType.ISNULL): 3438 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3439 3440 # Postgres supports ISNULL and NOTNULL for conditions. 3441 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3442 if self._match(TokenType.NOTNULL): 3443 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3444 this = self.expression(exp.Not, this=this) 3445 3446 if negate: 3447 this = self.expression(exp.Not, this=this) 3448 3449 if self._match(TokenType.IS): 3450 this = self._parse_is(this) 3451 3452 return this 3453 3454 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3455 index = self._index - 1 3456 negate = self._match(TokenType.NOT) 3457 3458 if self._match_text_seq("DISTINCT", "FROM"): 3459 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3460 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3461 3462 expression = self._parse_null() or self._parse_boolean() 3463 if not expression: 3464 self._retreat(index) 3465 return None 3466 3467 this = self.expression(exp.Is, this=this, expression=expression) 3468 return self.expression(exp.Not, this=this) if negate else this 3469 3470 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3471 unnest = self._parse_unnest(with_alias=False) 3472 if unnest: 3473 this = self.expression(exp.In, this=this, unnest=unnest) 3474 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3475 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3476 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3477 3478 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3479 this = self.expression(exp.In, this=this, query=expressions[0]) 3480 else: 3481 this = self.expression(exp.In, this=this, expressions=expressions) 3482 3483 if matched_l_paren: 3484 self._match_r_paren(this) 3485 elif not self._match(TokenType.R_BRACKET, expression=this): 3486 self.raise_error("Expecting ]") 3487 else: 3488 this = self.expression(exp.In, this=this, field=self._parse_field()) 3489 3490 return this 3491 3492 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3493 low = self._parse_bitwise() 3494 self._match(TokenType.AND) 3495 high = self._parse_bitwise() 3496 return self.expression(exp.Between, this=this, low=low, high=high) 3497 3498 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3499 if not self._match(TokenType.ESCAPE): 3500 return this 3501 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3502 3503 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3504 index = self._index 3505 3506 if not self._match(TokenType.INTERVAL) and match_interval: 3507 return None 3508 3509 if self._match(TokenType.STRING, advance=False): 3510 this = self._parse_primary() 3511 else: 3512 this = self._parse_term() 3513 3514 if not this or ( 3515 isinstance(this, exp.Column) 3516 and not this.table 3517 and not this.this.quoted 3518 and this.name.upper() == "IS" 3519 ): 3520 self._retreat(index) 3521 return None 3522 3523 unit = self._parse_function() or ( 3524 not self._match(TokenType.ALIAS, advance=False) 3525 and self._parse_var(any_token=True, upper=True) 3526 ) 3527 3528 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3529 # each INTERVAL expression into this canonical form so it's easy to transpile 3530 if this and this.is_number: 3531 this = exp.Literal.string(this.name) 3532 elif this and this.is_string: 3533 parts = this.name.split() 3534 3535 if len(parts) == 2: 3536 if unit: 3537 # This is not actually a unit, it's something else (e.g. a "window side") 3538 unit = None 3539 self._retreat(self._index - 1) 3540 3541 this = exp.Literal.string(parts[0]) 3542 unit = self.expression(exp.Var, this=parts[1].upper()) 3543 3544 return self.expression(exp.Interval, this=this, unit=unit) 3545 3546 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3547 this = self._parse_term() 3548 3549 while True: 3550 if self._match_set(self.BITWISE): 3551 this = self.expression( 3552 self.BITWISE[self._prev.token_type], 3553 this=this, 3554 expression=self._parse_term(), 3555 ) 3556 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3557 this = self.expression( 3558 exp.DPipe, 3559 this=this, 3560 expression=self._parse_term(), 3561 safe=not self.dialect.STRICT_STRING_CONCAT, 3562 ) 3563 elif self._match(TokenType.DQMARK): 3564 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3565 elif self._match_pair(TokenType.LT, TokenType.LT): 3566 this = self.expression( 3567 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3568 ) 3569 elif self._match_pair(TokenType.GT, TokenType.GT): 3570 this = self.expression( 3571 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3572 ) 3573 else: 3574 break 3575 3576 return this 3577 3578 def _parse_term(self) -> t.Optional[exp.Expression]: 3579 return self._parse_tokens(self._parse_factor, self.TERM) 3580 3581 def _parse_factor(self) -> t.Optional[exp.Expression]: 3582 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3583 this = parse_method() 3584 3585 while self._match_set(self.FACTOR): 3586 this = self.expression( 3587 self.FACTOR[self._prev.token_type], 3588 this=this, 3589 comments=self._prev_comments, 3590 expression=parse_method(), 3591 ) 3592 if isinstance(this, exp.Div): 3593 this.args["typed"] = self.dialect.TYPED_DIVISION 3594 this.args["safe"] = self.dialect.SAFE_DIVISION 3595 3596 return this 3597 3598 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3599 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3600 3601 def _parse_unary(self) -> t.Optional[exp.Expression]: 3602 if self._match_set(self.UNARY_PARSERS): 3603 return self.UNARY_PARSERS[self._prev.token_type](self) 3604 return self._parse_at_time_zone(self._parse_type()) 3605 3606 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3607 interval = parse_interval and self._parse_interval() 3608 if interval: 3609 # Convert INTERVAL 'val_1' unit_1 ... 'val_n' unit_n into a sum of intervals 3610 while self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3611 interval = self.expression( # type: ignore 3612 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3613 ) 3614 3615 return interval 3616 3617 index = self._index 3618 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3619 this = self._parse_column() 3620 3621 if data_type: 3622 if isinstance(this, exp.Literal): 3623 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3624 if parser: 3625 return parser(self, this, data_type) 3626 return self.expression(exp.Cast, this=this, to=data_type) 3627 if not data_type.expressions: 3628 self._retreat(index) 3629 return self._parse_column() 3630 return self._parse_column_ops(data_type) 3631 3632 return this and self._parse_column_ops(this) 3633 3634 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3635 this = self._parse_type() 3636 if not this: 3637 return None 3638 3639 return self.expression( 3640 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3641 ) 3642 3643 def _parse_types( 3644 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3645 ) -> t.Optional[exp.Expression]: 3646 index = self._index 3647 3648 prefix = self._match_text_seq("SYSUDTLIB", ".") 3649 3650 if not self._match_set(self.TYPE_TOKENS): 3651 identifier = allow_identifiers and self._parse_id_var( 3652 any_token=False, tokens=(TokenType.VAR,) 3653 ) 3654 3655 if identifier: 3656 tokens = self.dialect.tokenize(identifier.name) 3657 3658 if len(tokens) != 1: 3659 self.raise_error("Unexpected identifier", self._prev) 3660 3661 if tokens[0].token_type in self.TYPE_TOKENS: 3662 self._prev = tokens[0] 3663 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3664 type_name = identifier.name 3665 3666 while self._match(TokenType.DOT): 3667 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3668 3669 return exp.DataType.build(type_name, udt=True) 3670 else: 3671 self._retreat(self._index - 1) 3672 return None 3673 else: 3674 return None 3675 3676 type_token = self._prev.token_type 3677 3678 if type_token == TokenType.PSEUDO_TYPE: 3679 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3680 3681 if type_token == TokenType.OBJECT_IDENTIFIER: 3682 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3683 3684 nested = type_token in self.NESTED_TYPE_TOKENS 3685 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3686 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3687 expressions = None 3688 maybe_func = False 3689 3690 if self._match(TokenType.L_PAREN): 3691 if is_struct: 3692 expressions = self._parse_csv(self._parse_struct_types) 3693 elif nested: 3694 expressions = self._parse_csv( 3695 lambda: self._parse_types( 3696 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3697 ) 3698 ) 3699 elif type_token in self.ENUM_TYPE_TOKENS: 3700 expressions = self._parse_csv(self._parse_equality) 3701 elif is_aggregate: 3702 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3703 any_token=False, tokens=(TokenType.VAR,) 3704 ) 3705 if not func_or_ident or not self._match(TokenType.COMMA): 3706 return None 3707 expressions = self._parse_csv( 3708 lambda: self._parse_types( 3709 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3710 ) 3711 ) 3712 expressions.insert(0, func_or_ident) 3713 else: 3714 expressions = self._parse_csv(self._parse_type_size) 3715 3716 if not expressions or not self._match(TokenType.R_PAREN): 3717 self._retreat(index) 3718 return None 3719 3720 maybe_func = True 3721 3722 this: t.Optional[exp.Expression] = None 3723 values: t.Optional[t.List[exp.Expression]] = None 3724 3725 if nested and self._match(TokenType.LT): 3726 if is_struct: 3727 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 3728 else: 3729 expressions = self._parse_csv( 3730 lambda: self._parse_types( 3731 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3732 ) 3733 ) 3734 3735 if not self._match(TokenType.GT): 3736 self.raise_error("Expecting >") 3737 3738 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3739 values = self._parse_csv(self._parse_conjunction) 3740 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3741 3742 if type_token in self.TIMESTAMPS: 3743 if self._match_text_seq("WITH", "TIME", "ZONE"): 3744 maybe_func = False 3745 tz_type = ( 3746 exp.DataType.Type.TIMETZ 3747 if type_token in self.TIMES 3748 else exp.DataType.Type.TIMESTAMPTZ 3749 ) 3750 this = exp.DataType(this=tz_type, expressions=expressions) 3751 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3752 maybe_func = False 3753 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3754 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3755 maybe_func = False 3756 elif type_token == TokenType.INTERVAL: 3757 unit = self._parse_var() 3758 3759 if self._match_text_seq("TO"): 3760 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3761 else: 3762 span = None 3763 3764 if span or not unit: 3765 this = self.expression( 3766 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3767 ) 3768 else: 3769 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3770 3771 if maybe_func and check_func: 3772 index2 = self._index 3773 peek = self._parse_string() 3774 3775 if not peek: 3776 self._retreat(index) 3777 return None 3778 3779 self._retreat(index2) 3780 3781 if not this: 3782 if self._match_text_seq("UNSIGNED"): 3783 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3784 if not unsigned_type_token: 3785 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3786 3787 type_token = unsigned_type_token or type_token 3788 3789 this = exp.DataType( 3790 this=exp.DataType.Type[type_token.value], 3791 expressions=expressions, 3792 nested=nested, 3793 values=values, 3794 prefix=prefix, 3795 ) 3796 3797 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3798 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3799 3800 return this 3801 3802 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 3803 index = self._index 3804 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3805 self._match(TokenType.COLON) 3806 column_def = self._parse_column_def(this) 3807 3808 if type_required and ( 3809 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 3810 ): 3811 self._retreat(index) 3812 return self._parse_types() 3813 3814 return column_def 3815 3816 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3817 if not self._match_text_seq("AT", "TIME", "ZONE"): 3818 return this 3819 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3820 3821 def _parse_column(self) -> t.Optional[exp.Expression]: 3822 this = self._parse_field() 3823 if isinstance(this, exp.Identifier): 3824 this = self.expression(exp.Column, this=this) 3825 elif not this: 3826 return self._parse_bracket(this) 3827 return self._parse_column_ops(this) 3828 3829 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3830 this = self._parse_bracket(this) 3831 3832 while self._match_set(self.COLUMN_OPERATORS): 3833 op_token = self._prev.token_type 3834 op = self.COLUMN_OPERATORS.get(op_token) 3835 3836 if op_token == TokenType.DCOLON: 3837 field = self._parse_types() 3838 if not field: 3839 self.raise_error("Expected type") 3840 elif op and self._curr: 3841 self._advance() 3842 value = self._prev.text 3843 field = ( 3844 exp.Literal.number(value) 3845 if self._prev.token_type == TokenType.NUMBER 3846 else exp.Literal.string(value) 3847 ) 3848 else: 3849 field = self._parse_field(anonymous_func=True, any_token=True) 3850 3851 if isinstance(field, exp.Func): 3852 # bigquery allows function calls like x.y.count(...) 3853 # SAFE.SUBSTR(...) 3854 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3855 this = self._replace_columns_with_dots(this) 3856 3857 if op: 3858 this = op(self, this, field) 3859 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3860 this = self.expression( 3861 exp.Column, 3862 this=field, 3863 table=this.this, 3864 db=this.args.get("table"), 3865 catalog=this.args.get("db"), 3866 ) 3867 else: 3868 this = self.expression(exp.Dot, this=this, expression=field) 3869 this = self._parse_bracket(this) 3870 return this 3871 3872 def _parse_primary(self) -> t.Optional[exp.Expression]: 3873 if self._match_set(self.PRIMARY_PARSERS): 3874 token_type = self._prev.token_type 3875 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3876 3877 if token_type == TokenType.STRING: 3878 expressions = [primary] 3879 while self._match(TokenType.STRING): 3880 expressions.append(exp.Literal.string(self._prev.text)) 3881 3882 if len(expressions) > 1: 3883 return self.expression(exp.Concat, expressions=expressions) 3884 3885 return primary 3886 3887 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3888 return exp.Literal.number(f"0.{self._prev.text}") 3889 3890 if self._match(TokenType.L_PAREN): 3891 comments = self._prev_comments 3892 query = self._parse_select() 3893 3894 if query: 3895 expressions = [query] 3896 else: 3897 expressions = self._parse_expressions() 3898 3899 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3900 3901 if isinstance(this, exp.Subqueryable): 3902 this = self._parse_set_operations( 3903 self._parse_subquery(this=this, parse_alias=False) 3904 ) 3905 elif len(expressions) > 1: 3906 this = self.expression(exp.Tuple, expressions=expressions) 3907 else: 3908 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3909 3910 if this: 3911 this.add_comments(comments) 3912 3913 self._match_r_paren(expression=this) 3914 return this 3915 3916 return None 3917 3918 def _parse_field( 3919 self, 3920 any_token: bool = False, 3921 tokens: t.Optional[t.Collection[TokenType]] = None, 3922 anonymous_func: bool = False, 3923 ) -> t.Optional[exp.Expression]: 3924 return ( 3925 self._parse_primary() 3926 or self._parse_function(anonymous=anonymous_func) 3927 or self._parse_id_var(any_token=any_token, tokens=tokens) 3928 ) 3929 3930 def _parse_function( 3931 self, 3932 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3933 anonymous: bool = False, 3934 optional_parens: bool = True, 3935 ) -> t.Optional[exp.Expression]: 3936 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3937 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3938 fn_syntax = False 3939 if ( 3940 self._match(TokenType.L_BRACE, advance=False) 3941 and self._next 3942 and self._next.text.upper() == "FN" 3943 ): 3944 self._advance(2) 3945 fn_syntax = True 3946 3947 func = self._parse_function_call( 3948 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3949 ) 3950 3951 if fn_syntax: 3952 self._match(TokenType.R_BRACE) 3953 3954 return func 3955 3956 def _parse_function_call( 3957 self, 3958 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3959 anonymous: bool = False, 3960 optional_parens: bool = True, 3961 ) -> t.Optional[exp.Expression]: 3962 if not self._curr: 3963 return None 3964 3965 comments = self._curr.comments 3966 token_type = self._curr.token_type 3967 this = self._curr.text 3968 upper = this.upper() 3969 3970 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3971 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3972 self._advance() 3973 return parser(self) 3974 3975 if not self._next or self._next.token_type != TokenType.L_PAREN: 3976 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3977 self._advance() 3978 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3979 3980 return None 3981 3982 if token_type not in self.FUNC_TOKENS: 3983 return None 3984 3985 self._advance(2) 3986 3987 parser = self.FUNCTION_PARSERS.get(upper) 3988 if parser and not anonymous: 3989 this = parser(self) 3990 else: 3991 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3992 3993 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3994 this = self.expression(subquery_predicate, this=self._parse_select()) 3995 self._match_r_paren() 3996 return this 3997 3998 if functions is None: 3999 functions = self.FUNCTIONS 4000 4001 function = functions.get(upper) 4002 4003 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4004 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4005 4006 if function and not anonymous: 4007 if "dialect" in function.__code__.co_varnames: 4008 func = function(args, dialect=self.dialect) 4009 else: 4010 func = function(args) 4011 4012 func = self.validate_expression(func, args) 4013 if not self.dialect.NORMALIZE_FUNCTIONS: 4014 func.meta["name"] = this 4015 4016 this = func 4017 else: 4018 this = self.expression(exp.Anonymous, this=this, expressions=args) 4019 4020 if isinstance(this, exp.Expression): 4021 this.add_comments(comments) 4022 4023 self._match_r_paren(this) 4024 return self._parse_window(this) 4025 4026 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4027 return self._parse_column_def(self._parse_id_var()) 4028 4029 def _parse_user_defined_function( 4030 self, kind: t.Optional[TokenType] = None 4031 ) -> t.Optional[exp.Expression]: 4032 this = self._parse_id_var() 4033 4034 while self._match(TokenType.DOT): 4035 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4036 4037 if not self._match(TokenType.L_PAREN): 4038 return this 4039 4040 expressions = self._parse_csv(self._parse_function_parameter) 4041 self._match_r_paren() 4042 return self.expression( 4043 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4044 ) 4045 4046 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4047 literal = self._parse_primary() 4048 if literal: 4049 return self.expression(exp.Introducer, this=token.text, expression=literal) 4050 4051 return self.expression(exp.Identifier, this=token.text) 4052 4053 def _parse_session_parameter(self) -> exp.SessionParameter: 4054 kind = None 4055 this = self._parse_id_var() or self._parse_primary() 4056 4057 if this and self._match(TokenType.DOT): 4058 kind = this.name 4059 this = self._parse_var() or self._parse_primary() 4060 4061 return self.expression(exp.SessionParameter, this=this, kind=kind) 4062 4063 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4064 index = self._index 4065 4066 if self._match(TokenType.L_PAREN): 4067 expressions = t.cast( 4068 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4069 ) 4070 4071 if not self._match(TokenType.R_PAREN): 4072 self._retreat(index) 4073 else: 4074 expressions = [self._parse_id_var()] 4075 4076 if self._match_set(self.LAMBDAS): 4077 return self.LAMBDAS[self._prev.token_type](self, expressions) 4078 4079 self._retreat(index) 4080 4081 this: t.Optional[exp.Expression] 4082 4083 if self._match(TokenType.DISTINCT): 4084 this = self.expression( 4085 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4086 ) 4087 else: 4088 this = self._parse_select_or_expression(alias=alias) 4089 4090 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 4091 4092 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4093 index = self._index 4094 4095 if not self.errors: 4096 try: 4097 if self._parse_select(nested=True): 4098 return this 4099 except ParseError: 4100 pass 4101 finally: 4102 self.errors.clear() 4103 self._retreat(index) 4104 4105 if not self._match(TokenType.L_PAREN): 4106 return this 4107 4108 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4109 4110 self._match_r_paren() 4111 return self.expression(exp.Schema, this=this, expressions=args) 4112 4113 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4114 return self._parse_column_def(self._parse_field(any_token=True)) 4115 4116 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4117 # column defs are not really columns, they're identifiers 4118 if isinstance(this, exp.Column): 4119 this = this.this 4120 4121 kind = self._parse_types(schema=True) 4122 4123 if self._match_text_seq("FOR", "ORDINALITY"): 4124 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4125 4126 constraints: t.List[exp.Expression] = [] 4127 4128 if not kind and self._match(TokenType.ALIAS): 4129 constraints.append( 4130 self.expression( 4131 exp.ComputedColumnConstraint, 4132 this=self._parse_conjunction(), 4133 persisted=self._match_text_seq("PERSISTED"), 4134 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4135 ) 4136 ) 4137 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4138 self._match(TokenType.ALIAS) 4139 constraints.append( 4140 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4141 ) 4142 4143 while True: 4144 constraint = self._parse_column_constraint() 4145 if not constraint: 4146 break 4147 constraints.append(constraint) 4148 4149 if not kind and not constraints: 4150 return this 4151 4152 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4153 4154 def _parse_auto_increment( 4155 self, 4156 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4157 start = None 4158 increment = None 4159 4160 if self._match(TokenType.L_PAREN, advance=False): 4161 args = self._parse_wrapped_csv(self._parse_bitwise) 4162 start = seq_get(args, 0) 4163 increment = seq_get(args, 1) 4164 elif self._match_text_seq("START"): 4165 start = self._parse_bitwise() 4166 self._match_text_seq("INCREMENT") 4167 increment = self._parse_bitwise() 4168 4169 if start and increment: 4170 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4171 4172 return exp.AutoIncrementColumnConstraint() 4173 4174 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4175 if not self._match_text_seq("REFRESH"): 4176 self._retreat(self._index - 1) 4177 return None 4178 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4179 4180 def _parse_compress(self) -> exp.CompressColumnConstraint: 4181 if self._match(TokenType.L_PAREN, advance=False): 4182 return self.expression( 4183 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4184 ) 4185 4186 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4187 4188 def _parse_generated_as_identity( 4189 self, 4190 ) -> ( 4191 exp.GeneratedAsIdentityColumnConstraint 4192 | exp.ComputedColumnConstraint 4193 | exp.GeneratedAsRowColumnConstraint 4194 ): 4195 if self._match_text_seq("BY", "DEFAULT"): 4196 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4197 this = self.expression( 4198 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4199 ) 4200 else: 4201 self._match_text_seq("ALWAYS") 4202 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4203 4204 self._match(TokenType.ALIAS) 4205 4206 if self._match_text_seq("ROW"): 4207 start = self._match_text_seq("START") 4208 if not start: 4209 self._match(TokenType.END) 4210 hidden = self._match_text_seq("HIDDEN") 4211 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4212 4213 identity = self._match_text_seq("IDENTITY") 4214 4215 if self._match(TokenType.L_PAREN): 4216 if self._match(TokenType.START_WITH): 4217 this.set("start", self._parse_bitwise()) 4218 if self._match_text_seq("INCREMENT", "BY"): 4219 this.set("increment", self._parse_bitwise()) 4220 if self._match_text_seq("MINVALUE"): 4221 this.set("minvalue", self._parse_bitwise()) 4222 if self._match_text_seq("MAXVALUE"): 4223 this.set("maxvalue", self._parse_bitwise()) 4224 4225 if self._match_text_seq("CYCLE"): 4226 this.set("cycle", True) 4227 elif self._match_text_seq("NO", "CYCLE"): 4228 this.set("cycle", False) 4229 4230 if not identity: 4231 this.set("expression", self._parse_bitwise()) 4232 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4233 args = self._parse_csv(self._parse_bitwise) 4234 this.set("start", seq_get(args, 0)) 4235 this.set("increment", seq_get(args, 1)) 4236 4237 self._match_r_paren() 4238 4239 return this 4240 4241 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4242 self._match_text_seq("LENGTH") 4243 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4244 4245 def _parse_not_constraint( 4246 self, 4247 ) -> t.Optional[exp.Expression]: 4248 if self._match_text_seq("NULL"): 4249 return self.expression(exp.NotNullColumnConstraint) 4250 if self._match_text_seq("CASESPECIFIC"): 4251 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4252 if self._match_text_seq("FOR", "REPLICATION"): 4253 return self.expression(exp.NotForReplicationColumnConstraint) 4254 return None 4255 4256 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4257 if self._match(TokenType.CONSTRAINT): 4258 this = self._parse_id_var() 4259 else: 4260 this = None 4261 4262 if self._match_texts(self.CONSTRAINT_PARSERS): 4263 return self.expression( 4264 exp.ColumnConstraint, 4265 this=this, 4266 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4267 ) 4268 4269 return this 4270 4271 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4272 if not self._match(TokenType.CONSTRAINT): 4273 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4274 4275 this = self._parse_id_var() 4276 expressions = [] 4277 4278 while True: 4279 constraint = self._parse_unnamed_constraint() or self._parse_function() 4280 if not constraint: 4281 break 4282 expressions.append(constraint) 4283 4284 return self.expression(exp.Constraint, this=this, expressions=expressions) 4285 4286 def _parse_unnamed_constraint( 4287 self, constraints: t.Optional[t.Collection[str]] = None 4288 ) -> t.Optional[exp.Expression]: 4289 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4290 constraints or self.CONSTRAINT_PARSERS 4291 ): 4292 return None 4293 4294 constraint = self._prev.text.upper() 4295 if constraint not in self.CONSTRAINT_PARSERS: 4296 self.raise_error(f"No parser found for schema constraint {constraint}.") 4297 4298 return self.CONSTRAINT_PARSERS[constraint](self) 4299 4300 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4301 self._match_text_seq("KEY") 4302 return self.expression( 4303 exp.UniqueColumnConstraint, 4304 this=self._parse_schema(self._parse_id_var(any_token=False)), 4305 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4306 ) 4307 4308 def _parse_key_constraint_options(self) -> t.List[str]: 4309 options = [] 4310 while True: 4311 if not self._curr: 4312 break 4313 4314 if self._match(TokenType.ON): 4315 action = None 4316 on = self._advance_any() and self._prev.text 4317 4318 if self._match_text_seq("NO", "ACTION"): 4319 action = "NO ACTION" 4320 elif self._match_text_seq("CASCADE"): 4321 action = "CASCADE" 4322 elif self._match_text_seq("RESTRICT"): 4323 action = "RESTRICT" 4324 elif self._match_pair(TokenType.SET, TokenType.NULL): 4325 action = "SET NULL" 4326 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4327 action = "SET DEFAULT" 4328 else: 4329 self.raise_error("Invalid key constraint") 4330 4331 options.append(f"ON {on} {action}") 4332 elif self._match_text_seq("NOT", "ENFORCED"): 4333 options.append("NOT ENFORCED") 4334 elif self._match_text_seq("DEFERRABLE"): 4335 options.append("DEFERRABLE") 4336 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4337 options.append("INITIALLY DEFERRED") 4338 elif self._match_text_seq("NORELY"): 4339 options.append("NORELY") 4340 elif self._match_text_seq("MATCH", "FULL"): 4341 options.append("MATCH FULL") 4342 else: 4343 break 4344 4345 return options 4346 4347 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4348 if match and not self._match(TokenType.REFERENCES): 4349 return None 4350 4351 expressions = None 4352 this = self._parse_table(schema=True) 4353 options = self._parse_key_constraint_options() 4354 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4355 4356 def _parse_foreign_key(self) -> exp.ForeignKey: 4357 expressions = self._parse_wrapped_id_vars() 4358 reference = self._parse_references() 4359 options = {} 4360 4361 while self._match(TokenType.ON): 4362 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4363 self.raise_error("Expected DELETE or UPDATE") 4364 4365 kind = self._prev.text.lower() 4366 4367 if self._match_text_seq("NO", "ACTION"): 4368 action = "NO ACTION" 4369 elif self._match(TokenType.SET): 4370 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4371 action = "SET " + self._prev.text.upper() 4372 else: 4373 self._advance() 4374 action = self._prev.text.upper() 4375 4376 options[kind] = action 4377 4378 return self.expression( 4379 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4380 ) 4381 4382 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4383 return self._parse_field() 4384 4385 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4386 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4387 self._retreat(self._index - 1) 4388 return None 4389 4390 id_vars = self._parse_wrapped_id_vars() 4391 return self.expression( 4392 exp.PeriodForSystemTimeConstraint, 4393 this=seq_get(id_vars, 0), 4394 expression=seq_get(id_vars, 1), 4395 ) 4396 4397 def _parse_primary_key( 4398 self, wrapped_optional: bool = False, in_props: bool = False 4399 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4400 desc = ( 4401 self._match_set((TokenType.ASC, TokenType.DESC)) 4402 and self._prev.token_type == TokenType.DESC 4403 ) 4404 4405 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4406 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4407 4408 expressions = self._parse_wrapped_csv( 4409 self._parse_primary_key_part, optional=wrapped_optional 4410 ) 4411 options = self._parse_key_constraint_options() 4412 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4413 4414 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4415 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4416 4417 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4418 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4419 return this 4420 4421 bracket_kind = self._prev.token_type 4422 expressions = self._parse_csv( 4423 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4424 ) 4425 4426 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4427 self.raise_error("Expected ]") 4428 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4429 self.raise_error("Expected }") 4430 4431 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4432 if bracket_kind == TokenType.L_BRACE: 4433 this = self.expression(exp.Struct, expressions=expressions) 4434 elif not this or this.name.upper() == "ARRAY": 4435 this = self.expression(exp.Array, expressions=expressions) 4436 else: 4437 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4438 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4439 4440 self._add_comments(this) 4441 return self._parse_bracket(this) 4442 4443 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4444 if self._match(TokenType.COLON): 4445 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4446 return this 4447 4448 def _parse_case(self) -> t.Optional[exp.Expression]: 4449 ifs = [] 4450 default = None 4451 4452 comments = self._prev_comments 4453 expression = self._parse_conjunction() 4454 4455 while self._match(TokenType.WHEN): 4456 this = self._parse_conjunction() 4457 self._match(TokenType.THEN) 4458 then = self._parse_conjunction() 4459 ifs.append(self.expression(exp.If, this=this, true=then)) 4460 4461 if self._match(TokenType.ELSE): 4462 default = self._parse_conjunction() 4463 4464 if not self._match(TokenType.END): 4465 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4466 default = exp.column("interval") 4467 else: 4468 self.raise_error("Expected END after CASE", self._prev) 4469 4470 return self._parse_window( 4471 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4472 ) 4473 4474 def _parse_if(self) -> t.Optional[exp.Expression]: 4475 if self._match(TokenType.L_PAREN): 4476 args = self._parse_csv(self._parse_conjunction) 4477 this = self.validate_expression(exp.If.from_arg_list(args), args) 4478 self._match_r_paren() 4479 else: 4480 index = self._index - 1 4481 4482 if self.NO_PAREN_IF_COMMANDS and index == 0: 4483 return self._parse_as_command(self._prev) 4484 4485 condition = self._parse_conjunction() 4486 4487 if not condition: 4488 self._retreat(index) 4489 return None 4490 4491 self._match(TokenType.THEN) 4492 true = self._parse_conjunction() 4493 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4494 self._match(TokenType.END) 4495 this = self.expression(exp.If, this=condition, true=true, false=false) 4496 4497 return self._parse_window(this) 4498 4499 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4500 if not self._match_text_seq("VALUE", "FOR"): 4501 self._retreat(self._index - 1) 4502 return None 4503 4504 return self.expression( 4505 exp.NextValueFor, 4506 this=self._parse_column(), 4507 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4508 ) 4509 4510 def _parse_extract(self) -> exp.Extract: 4511 this = self._parse_function() or self._parse_var() or self._parse_type() 4512 4513 if self._match(TokenType.FROM): 4514 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4515 4516 if not self._match(TokenType.COMMA): 4517 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4518 4519 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4520 4521 def _parse_any_value(self) -> exp.AnyValue: 4522 this = self._parse_lambda() 4523 is_max = None 4524 having = None 4525 4526 if self._match(TokenType.HAVING): 4527 self._match_texts(("MAX", "MIN")) 4528 is_max = self._prev.text == "MAX" 4529 having = self._parse_column() 4530 4531 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4532 4533 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4534 this = self._parse_conjunction() 4535 4536 if not self._match(TokenType.ALIAS): 4537 if self._match(TokenType.COMMA): 4538 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4539 4540 self.raise_error("Expected AS after CAST") 4541 4542 fmt = None 4543 to = self._parse_types() 4544 4545 if self._match(TokenType.FORMAT): 4546 fmt_string = self._parse_string() 4547 fmt = self._parse_at_time_zone(fmt_string) 4548 4549 if not to: 4550 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4551 if to.this in exp.DataType.TEMPORAL_TYPES: 4552 this = self.expression( 4553 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4554 this=this, 4555 format=exp.Literal.string( 4556 format_time( 4557 fmt_string.this if fmt_string else "", 4558 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4559 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4560 ) 4561 ), 4562 ) 4563 4564 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4565 this.set("zone", fmt.args["zone"]) 4566 return this 4567 elif not to: 4568 self.raise_error("Expected TYPE after CAST") 4569 elif isinstance(to, exp.Identifier): 4570 to = exp.DataType.build(to.name, udt=True) 4571 elif to.this == exp.DataType.Type.CHAR: 4572 if self._match(TokenType.CHARACTER_SET): 4573 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4574 4575 return self.expression( 4576 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4577 ) 4578 4579 def _parse_string_agg(self) -> exp.Expression: 4580 if self._match(TokenType.DISTINCT): 4581 args: t.List[t.Optional[exp.Expression]] = [ 4582 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4583 ] 4584 if self._match(TokenType.COMMA): 4585 args.extend(self._parse_csv(self._parse_conjunction)) 4586 else: 4587 args = self._parse_csv(self._parse_conjunction) # type: ignore 4588 4589 index = self._index 4590 if not self._match(TokenType.R_PAREN) and args: 4591 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4592 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4593 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4594 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4595 4596 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4597 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4598 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4599 if not self._match_text_seq("WITHIN", "GROUP"): 4600 self._retreat(index) 4601 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4602 4603 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4604 order = self._parse_order(this=seq_get(args, 0)) 4605 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4606 4607 def _parse_convert( 4608 self, strict: bool, safe: t.Optional[bool] = None 4609 ) -> t.Optional[exp.Expression]: 4610 this = self._parse_bitwise() 4611 4612 if self._match(TokenType.USING): 4613 to: t.Optional[exp.Expression] = self.expression( 4614 exp.CharacterSet, this=self._parse_var() 4615 ) 4616 elif self._match(TokenType.COMMA): 4617 to = self._parse_types() 4618 else: 4619 to = None 4620 4621 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4622 4623 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4624 """ 4625 There are generally two variants of the DECODE function: 4626 4627 - DECODE(bin, charset) 4628 - DECODE(expression, search, result [, search, result] ... [, default]) 4629 4630 The second variant will always be parsed into a CASE expression. Note that NULL 4631 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4632 instead of relying on pattern matching. 4633 """ 4634 args = self._parse_csv(self._parse_conjunction) 4635 4636 if len(args) < 3: 4637 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4638 4639 expression, *expressions = args 4640 if not expression: 4641 return None 4642 4643 ifs = [] 4644 for search, result in zip(expressions[::2], expressions[1::2]): 4645 if not search or not result: 4646 return None 4647 4648 if isinstance(search, exp.Literal): 4649 ifs.append( 4650 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4651 ) 4652 elif isinstance(search, exp.Null): 4653 ifs.append( 4654 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4655 ) 4656 else: 4657 cond = exp.or_( 4658 exp.EQ(this=expression.copy(), expression=search), 4659 exp.and_( 4660 exp.Is(this=expression.copy(), expression=exp.Null()), 4661 exp.Is(this=search.copy(), expression=exp.Null()), 4662 copy=False, 4663 ), 4664 copy=False, 4665 ) 4666 ifs.append(exp.If(this=cond, true=result)) 4667 4668 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4669 4670 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4671 self._match_text_seq("KEY") 4672 key = self._parse_column() 4673 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4674 self._match_text_seq("VALUE") 4675 value = self._parse_bitwise() 4676 4677 if not key and not value: 4678 return None 4679 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4680 4681 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4682 if not this or not self._match_text_seq("FORMAT", "JSON"): 4683 return this 4684 4685 return self.expression(exp.FormatJson, this=this) 4686 4687 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4688 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4689 for value in values: 4690 if self._match_text_seq(value, "ON", on): 4691 return f"{value} ON {on}" 4692 4693 return None 4694 4695 @t.overload 4696 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 4697 4698 @t.overload 4699 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 4700 4701 def _parse_json_object(self, agg=False): 4702 star = self._parse_star() 4703 expressions = ( 4704 [star] 4705 if star 4706 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4707 ) 4708 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4709 4710 unique_keys = None 4711 if self._match_text_seq("WITH", "UNIQUE"): 4712 unique_keys = True 4713 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4714 unique_keys = False 4715 4716 self._match_text_seq("KEYS") 4717 4718 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4719 self._parse_type() 4720 ) 4721 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4722 4723 return self.expression( 4724 exp.JSONObjectAgg if agg else exp.JSONObject, 4725 expressions=expressions, 4726 null_handling=null_handling, 4727 unique_keys=unique_keys, 4728 return_type=return_type, 4729 encoding=encoding, 4730 ) 4731 4732 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4733 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4734 if not self._match_text_seq("NESTED"): 4735 this = self._parse_id_var() 4736 kind = self._parse_types(allow_identifiers=False) 4737 nested = None 4738 else: 4739 this = None 4740 kind = None 4741 nested = True 4742 4743 path = self._match_text_seq("PATH") and self._parse_string() 4744 nested_schema = nested and self._parse_json_schema() 4745 4746 return self.expression( 4747 exp.JSONColumnDef, 4748 this=this, 4749 kind=kind, 4750 path=path, 4751 nested_schema=nested_schema, 4752 ) 4753 4754 def _parse_json_schema(self) -> exp.JSONSchema: 4755 self._match_text_seq("COLUMNS") 4756 return self.expression( 4757 exp.JSONSchema, 4758 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4759 ) 4760 4761 def _parse_json_table(self) -> exp.JSONTable: 4762 this = self._parse_format_json(self._parse_bitwise()) 4763 path = self._match(TokenType.COMMA) and self._parse_string() 4764 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4765 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4766 schema = self._parse_json_schema() 4767 4768 return exp.JSONTable( 4769 this=this, 4770 schema=schema, 4771 path=path, 4772 error_handling=error_handling, 4773 empty_handling=empty_handling, 4774 ) 4775 4776 def _parse_match_against(self) -> exp.MatchAgainst: 4777 expressions = self._parse_csv(self._parse_column) 4778 4779 self._match_text_seq(")", "AGAINST", "(") 4780 4781 this = self._parse_string() 4782 4783 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4784 modifier = "IN NATURAL LANGUAGE MODE" 4785 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4786 modifier = f"{modifier} WITH QUERY EXPANSION" 4787 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4788 modifier = "IN BOOLEAN MODE" 4789 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4790 modifier = "WITH QUERY EXPANSION" 4791 else: 4792 modifier = None 4793 4794 return self.expression( 4795 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4796 ) 4797 4798 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4799 def _parse_open_json(self) -> exp.OpenJSON: 4800 this = self._parse_bitwise() 4801 path = self._match(TokenType.COMMA) and self._parse_string() 4802 4803 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4804 this = self._parse_field(any_token=True) 4805 kind = self._parse_types() 4806 path = self._parse_string() 4807 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4808 4809 return self.expression( 4810 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4811 ) 4812 4813 expressions = None 4814 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4815 self._match_l_paren() 4816 expressions = self._parse_csv(_parse_open_json_column_def) 4817 4818 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4819 4820 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4821 args = self._parse_csv(self._parse_bitwise) 4822 4823 if self._match(TokenType.IN): 4824 return self.expression( 4825 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4826 ) 4827 4828 if haystack_first: 4829 haystack = seq_get(args, 0) 4830 needle = seq_get(args, 1) 4831 else: 4832 needle = seq_get(args, 0) 4833 haystack = seq_get(args, 1) 4834 4835 return self.expression( 4836 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4837 ) 4838 4839 def _parse_predict(self) -> exp.Predict: 4840 self._match_text_seq("MODEL") 4841 this = self._parse_table() 4842 4843 self._match(TokenType.COMMA) 4844 self._match_text_seq("TABLE") 4845 4846 return self.expression( 4847 exp.Predict, 4848 this=this, 4849 expression=self._parse_table(), 4850 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4851 ) 4852 4853 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4854 args = self._parse_csv(self._parse_table) 4855 return exp.JoinHint(this=func_name.upper(), expressions=args) 4856 4857 def _parse_substring(self) -> exp.Substring: 4858 # Postgres supports the form: substring(string [from int] [for int]) 4859 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4860 4861 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4862 4863 if self._match(TokenType.FROM): 4864 args.append(self._parse_bitwise()) 4865 if self._match(TokenType.FOR): 4866 args.append(self._parse_bitwise()) 4867 4868 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4869 4870 def _parse_trim(self) -> exp.Trim: 4871 # https://www.w3resource.com/sql/character-functions/trim.php 4872 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4873 4874 position = None 4875 collation = None 4876 expression = None 4877 4878 if self._match_texts(self.TRIM_TYPES): 4879 position = self._prev.text.upper() 4880 4881 this = self._parse_bitwise() 4882 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4883 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4884 expression = self._parse_bitwise() 4885 4886 if invert_order: 4887 this, expression = expression, this 4888 4889 if self._match(TokenType.COLLATE): 4890 collation = self._parse_bitwise() 4891 4892 return self.expression( 4893 exp.Trim, this=this, position=position, expression=expression, collation=collation 4894 ) 4895 4896 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4897 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4898 4899 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4900 return self._parse_window(self._parse_id_var(), alias=True) 4901 4902 def _parse_respect_or_ignore_nulls( 4903 self, this: t.Optional[exp.Expression] 4904 ) -> t.Optional[exp.Expression]: 4905 if self._match_text_seq("IGNORE", "NULLS"): 4906 return self.expression(exp.IgnoreNulls, this=this) 4907 if self._match_text_seq("RESPECT", "NULLS"): 4908 return self.expression(exp.RespectNulls, this=this) 4909 return this 4910 4911 def _parse_window( 4912 self, this: t.Optional[exp.Expression], alias: bool = False 4913 ) -> t.Optional[exp.Expression]: 4914 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4915 self._match(TokenType.WHERE) 4916 this = self.expression( 4917 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4918 ) 4919 self._match_r_paren() 4920 4921 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4922 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4923 if self._match_text_seq("WITHIN", "GROUP"): 4924 order = self._parse_wrapped(self._parse_order) 4925 this = self.expression(exp.WithinGroup, this=this, expression=order) 4926 4927 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4928 # Some dialects choose to implement and some do not. 4929 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4930 4931 # There is some code above in _parse_lambda that handles 4932 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4933 4934 # The below changes handle 4935 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4936 4937 # Oracle allows both formats 4938 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4939 # and Snowflake chose to do the same for familiarity 4940 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4941 this = self._parse_respect_or_ignore_nulls(this) 4942 4943 # bigquery select from window x AS (partition by ...) 4944 if alias: 4945 over = None 4946 self._match(TokenType.ALIAS) 4947 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4948 return this 4949 else: 4950 over = self._prev.text.upper() 4951 4952 if not self._match(TokenType.L_PAREN): 4953 return self.expression( 4954 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4955 ) 4956 4957 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4958 4959 first = self._match(TokenType.FIRST) 4960 if self._match_text_seq("LAST"): 4961 first = False 4962 4963 partition, order = self._parse_partition_and_order() 4964 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4965 4966 if kind: 4967 self._match(TokenType.BETWEEN) 4968 start = self._parse_window_spec() 4969 self._match(TokenType.AND) 4970 end = self._parse_window_spec() 4971 4972 spec = self.expression( 4973 exp.WindowSpec, 4974 kind=kind, 4975 start=start["value"], 4976 start_side=start["side"], 4977 end=end["value"], 4978 end_side=end["side"], 4979 ) 4980 else: 4981 spec = None 4982 4983 self._match_r_paren() 4984 4985 window = self.expression( 4986 exp.Window, 4987 this=this, 4988 partition_by=partition, 4989 order=order, 4990 spec=spec, 4991 alias=window_alias, 4992 over=over, 4993 first=first, 4994 ) 4995 4996 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4997 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4998 return self._parse_window(window, alias=alias) 4999 5000 return window 5001 5002 def _parse_partition_and_order( 5003 self, 5004 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5005 return self._parse_partition_by(), self._parse_order() 5006 5007 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5008 self._match(TokenType.BETWEEN) 5009 5010 return { 5011 "value": ( 5012 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5013 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5014 or self._parse_bitwise() 5015 ), 5016 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5017 } 5018 5019 def _parse_alias( 5020 self, this: t.Optional[exp.Expression], explicit: bool = False 5021 ) -> t.Optional[exp.Expression]: 5022 any_token = self._match(TokenType.ALIAS) 5023 comments = self._prev_comments 5024 5025 if explicit and not any_token: 5026 return this 5027 5028 if self._match(TokenType.L_PAREN): 5029 aliases = self.expression( 5030 exp.Aliases, 5031 comments=comments, 5032 this=this, 5033 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5034 ) 5035 self._match_r_paren(aliases) 5036 return aliases 5037 5038 alias = self._parse_id_var(any_token) or ( 5039 self.STRING_ALIASES and self._parse_string_as_identifier() 5040 ) 5041 5042 if alias: 5043 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5044 column = this.this 5045 5046 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5047 if not this.comments and column and column.comments: 5048 this.comments = column.comments 5049 column.comments = None 5050 5051 return this 5052 5053 def _parse_id_var( 5054 self, 5055 any_token: bool = True, 5056 tokens: t.Optional[t.Collection[TokenType]] = None, 5057 ) -> t.Optional[exp.Expression]: 5058 identifier = self._parse_identifier() 5059 5060 if identifier: 5061 return identifier 5062 5063 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5064 quoted = self._prev.token_type == TokenType.STRING 5065 return exp.Identifier(this=self._prev.text, quoted=quoted) 5066 5067 return None 5068 5069 def _parse_string(self) -> t.Optional[exp.Expression]: 5070 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 5071 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 5072 return self._parse_placeholder() 5073 5074 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5075 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5076 5077 def _parse_number(self) -> t.Optional[exp.Expression]: 5078 if self._match(TokenType.NUMBER): 5079 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 5080 return self._parse_placeholder() 5081 5082 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5083 if self._match(TokenType.IDENTIFIER): 5084 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5085 return self._parse_placeholder() 5086 5087 def _parse_var( 5088 self, 5089 any_token: bool = False, 5090 tokens: t.Optional[t.Collection[TokenType]] = None, 5091 upper: bool = False, 5092 ) -> t.Optional[exp.Expression]: 5093 if ( 5094 (any_token and self._advance_any()) 5095 or self._match(TokenType.VAR) 5096 or (self._match_set(tokens) if tokens else False) 5097 ): 5098 return self.expression( 5099 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5100 ) 5101 return self._parse_placeholder() 5102 5103 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5104 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5105 self._advance() 5106 return self._prev 5107 return None 5108 5109 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5110 return self._parse_var() or self._parse_string() 5111 5112 def _parse_null(self) -> t.Optional[exp.Expression]: 5113 if self._match_set(self.NULL_TOKENS): 5114 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5115 return self._parse_placeholder() 5116 5117 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5118 if self._match(TokenType.TRUE): 5119 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5120 if self._match(TokenType.FALSE): 5121 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5122 return self._parse_placeholder() 5123 5124 def _parse_star(self) -> t.Optional[exp.Expression]: 5125 if self._match(TokenType.STAR): 5126 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5127 return self._parse_placeholder() 5128 5129 def _parse_parameter(self) -> exp.Parameter: 5130 def _parse_parameter_part() -> t.Optional[exp.Expression]: 5131 return ( 5132 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 5133 ) 5134 5135 self._match(TokenType.L_BRACE) 5136 this = _parse_parameter_part() 5137 expression = self._match(TokenType.COLON) and _parse_parameter_part() 5138 self._match(TokenType.R_BRACE) 5139 5140 return self.expression(exp.Parameter, this=this, expression=expression) 5141 5142 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5143 if self._match_set(self.PLACEHOLDER_PARSERS): 5144 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5145 if placeholder: 5146 return placeholder 5147 self._advance(-1) 5148 return None 5149 5150 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5151 if not self._match(TokenType.EXCEPT): 5152 return None 5153 if self._match(TokenType.L_PAREN, advance=False): 5154 return self._parse_wrapped_csv(self._parse_column) 5155 5156 except_column = self._parse_column() 5157 return [except_column] if except_column else None 5158 5159 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5160 if not self._match(TokenType.REPLACE): 5161 return None 5162 if self._match(TokenType.L_PAREN, advance=False): 5163 return self._parse_wrapped_csv(self._parse_expression) 5164 5165 replace_expression = self._parse_expression() 5166 return [replace_expression] if replace_expression else None 5167 5168 def _parse_csv( 5169 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5170 ) -> t.List[exp.Expression]: 5171 parse_result = parse_method() 5172 items = [parse_result] if parse_result is not None else [] 5173 5174 while self._match(sep): 5175 self._add_comments(parse_result) 5176 parse_result = parse_method() 5177 if parse_result is not None: 5178 items.append(parse_result) 5179 5180 return items 5181 5182 def _parse_tokens( 5183 self, parse_method: t.Callable, expressions: t.Dict 5184 ) -> t.Optional[exp.Expression]: 5185 this = parse_method() 5186 5187 while self._match_set(expressions): 5188 this = self.expression( 5189 expressions[self._prev.token_type], 5190 this=this, 5191 comments=self._prev_comments, 5192 expression=parse_method(), 5193 ) 5194 5195 return this 5196 5197 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5198 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5199 5200 def _parse_wrapped_csv( 5201 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5202 ) -> t.List[exp.Expression]: 5203 return self._parse_wrapped( 5204 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5205 ) 5206 5207 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5208 wrapped = self._match(TokenType.L_PAREN) 5209 if not wrapped and not optional: 5210 self.raise_error("Expecting (") 5211 parse_result = parse_method() 5212 if wrapped: 5213 self._match_r_paren() 5214 return parse_result 5215 5216 def _parse_expressions(self) -> t.List[exp.Expression]: 5217 return self._parse_csv(self._parse_expression) 5218 5219 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5220 return self._parse_select() or self._parse_set_operations( 5221 self._parse_expression() if alias else self._parse_conjunction() 5222 ) 5223 5224 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5225 return self._parse_query_modifiers( 5226 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5227 ) 5228 5229 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5230 this = None 5231 if self._match_texts(self.TRANSACTION_KIND): 5232 this = self._prev.text 5233 5234 self._match_texts(("TRANSACTION", "WORK")) 5235 5236 modes = [] 5237 while True: 5238 mode = [] 5239 while self._match(TokenType.VAR): 5240 mode.append(self._prev.text) 5241 5242 if mode: 5243 modes.append(" ".join(mode)) 5244 if not self._match(TokenType.COMMA): 5245 break 5246 5247 return self.expression(exp.Transaction, this=this, modes=modes) 5248 5249 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5250 chain = None 5251 savepoint = None 5252 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5253 5254 self._match_texts(("TRANSACTION", "WORK")) 5255 5256 if self._match_text_seq("TO"): 5257 self._match_text_seq("SAVEPOINT") 5258 savepoint = self._parse_id_var() 5259 5260 if self._match(TokenType.AND): 5261 chain = not self._match_text_seq("NO") 5262 self._match_text_seq("CHAIN") 5263 5264 if is_rollback: 5265 return self.expression(exp.Rollback, savepoint=savepoint) 5266 5267 return self.expression(exp.Commit, chain=chain) 5268 5269 def _parse_refresh(self) -> exp.Refresh: 5270 self._match(TokenType.TABLE) 5271 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5272 5273 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5274 if not self._match_text_seq("ADD"): 5275 return None 5276 5277 self._match(TokenType.COLUMN) 5278 exists_column = self._parse_exists(not_=True) 5279 expression = self._parse_field_def() 5280 5281 if expression: 5282 expression.set("exists", exists_column) 5283 5284 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5285 if self._match_texts(("FIRST", "AFTER")): 5286 position = self._prev.text 5287 column_position = self.expression( 5288 exp.ColumnPosition, this=self._parse_column(), position=position 5289 ) 5290 expression.set("position", column_position) 5291 5292 return expression 5293 5294 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5295 drop = self._match(TokenType.DROP) and self._parse_drop() 5296 if drop and not isinstance(drop, exp.Command): 5297 drop.set("kind", drop.args.get("kind", "COLUMN")) 5298 return drop 5299 5300 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5301 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5302 return self.expression( 5303 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5304 ) 5305 5306 def _parse_add_constraint(self) -> exp.AddConstraint: 5307 this = None 5308 kind = self._prev.token_type 5309 5310 if kind == TokenType.CONSTRAINT: 5311 this = self._parse_id_var() 5312 5313 if self._match_text_seq("CHECK"): 5314 expression = self._parse_wrapped(self._parse_conjunction) 5315 enforced = self._match_text_seq("ENFORCED") or False 5316 5317 return self.expression( 5318 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5319 ) 5320 5321 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5322 expression = self._parse_foreign_key() 5323 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5324 expression = self._parse_primary_key() 5325 else: 5326 expression = None 5327 5328 return self.expression(exp.AddConstraint, this=this, expression=expression) 5329 5330 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5331 index = self._index - 1 5332 5333 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5334 return self._parse_csv(self._parse_add_constraint) 5335 5336 self._retreat(index) 5337 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5338 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5339 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5340 5341 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5342 self._match(TokenType.COLUMN) 5343 column = self._parse_field(any_token=True) 5344 5345 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5346 return self.expression(exp.AlterColumn, this=column, drop=True) 5347 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5348 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5349 if self._match(TokenType.COMMENT): 5350 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5351 5352 self._match_text_seq("SET", "DATA") 5353 return self.expression( 5354 exp.AlterColumn, 5355 this=column, 5356 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5357 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5358 using=self._match(TokenType.USING) and self._parse_conjunction(), 5359 ) 5360 5361 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5362 index = self._index - 1 5363 5364 partition_exists = self._parse_exists() 5365 if self._match(TokenType.PARTITION, advance=False): 5366 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5367 5368 self._retreat(index) 5369 return self._parse_csv(self._parse_drop_column) 5370 5371 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5372 if self._match(TokenType.COLUMN): 5373 exists = self._parse_exists() 5374 old_column = self._parse_column() 5375 to = self._match_text_seq("TO") 5376 new_column = self._parse_column() 5377 5378 if old_column is None or to is None or new_column is None: 5379 return None 5380 5381 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5382 5383 self._match_text_seq("TO") 5384 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5385 5386 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5387 start = self._prev 5388 5389 if not self._match(TokenType.TABLE): 5390 return self._parse_as_command(start) 5391 5392 exists = self._parse_exists() 5393 only = self._match_text_seq("ONLY") 5394 this = self._parse_table(schema=True) 5395 5396 if self._next: 5397 self._advance() 5398 5399 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5400 if parser: 5401 actions = ensure_list(parser(self)) 5402 5403 if not self._curr and actions: 5404 return self.expression( 5405 exp.AlterTable, 5406 this=this, 5407 exists=exists, 5408 actions=actions, 5409 only=only, 5410 ) 5411 5412 return self._parse_as_command(start) 5413 5414 def _parse_merge(self) -> exp.Merge: 5415 self._match(TokenType.INTO) 5416 target = self._parse_table() 5417 5418 if target and self._match(TokenType.ALIAS, advance=False): 5419 target.set("alias", self._parse_table_alias()) 5420 5421 self._match(TokenType.USING) 5422 using = self._parse_table() 5423 5424 self._match(TokenType.ON) 5425 on = self._parse_conjunction() 5426 5427 return self.expression( 5428 exp.Merge, 5429 this=target, 5430 using=using, 5431 on=on, 5432 expressions=self._parse_when_matched(), 5433 ) 5434 5435 def _parse_when_matched(self) -> t.List[exp.When]: 5436 whens = [] 5437 5438 while self._match(TokenType.WHEN): 5439 matched = not self._match(TokenType.NOT) 5440 self._match_text_seq("MATCHED") 5441 source = ( 5442 False 5443 if self._match_text_seq("BY", "TARGET") 5444 else self._match_text_seq("BY", "SOURCE") 5445 ) 5446 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5447 5448 self._match(TokenType.THEN) 5449 5450 if self._match(TokenType.INSERT): 5451 _this = self._parse_star() 5452 if _this: 5453 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5454 else: 5455 then = self.expression( 5456 exp.Insert, 5457 this=self._parse_value(), 5458 expression=self._match(TokenType.VALUES) and self._parse_value(), 5459 ) 5460 elif self._match(TokenType.UPDATE): 5461 expressions = self._parse_star() 5462 if expressions: 5463 then = self.expression(exp.Update, expressions=expressions) 5464 else: 5465 then = self.expression( 5466 exp.Update, 5467 expressions=self._match(TokenType.SET) 5468 and self._parse_csv(self._parse_equality), 5469 ) 5470 elif self._match(TokenType.DELETE): 5471 then = self.expression(exp.Var, this=self._prev.text) 5472 else: 5473 then = None 5474 5475 whens.append( 5476 self.expression( 5477 exp.When, 5478 matched=matched, 5479 source=source, 5480 condition=condition, 5481 then=then, 5482 ) 5483 ) 5484 return whens 5485 5486 def _parse_show(self) -> t.Optional[exp.Expression]: 5487 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5488 if parser: 5489 return parser(self) 5490 return self._parse_as_command(self._prev) 5491 5492 def _parse_set_item_assignment( 5493 self, kind: t.Optional[str] = None 5494 ) -> t.Optional[exp.Expression]: 5495 index = self._index 5496 5497 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5498 return self._parse_set_transaction(global_=kind == "GLOBAL") 5499 5500 left = self._parse_primary() or self._parse_id_var() 5501 assignment_delimiter = self._match_texts(("=", "TO")) 5502 5503 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5504 self._retreat(index) 5505 return None 5506 5507 right = self._parse_statement() or self._parse_id_var() 5508 this = self.expression(exp.EQ, this=left, expression=right) 5509 5510 return self.expression(exp.SetItem, this=this, kind=kind) 5511 5512 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5513 self._match_text_seq("TRANSACTION") 5514 characteristics = self._parse_csv( 5515 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5516 ) 5517 return self.expression( 5518 exp.SetItem, 5519 expressions=characteristics, 5520 kind="TRANSACTION", 5521 **{"global": global_}, # type: ignore 5522 ) 5523 5524 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5525 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5526 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5527 5528 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5529 index = self._index 5530 set_ = self.expression( 5531 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5532 ) 5533 5534 if self._curr: 5535 self._retreat(index) 5536 return self._parse_as_command(self._prev) 5537 5538 return set_ 5539 5540 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5541 for option in options: 5542 if self._match_text_seq(*option.split(" ")): 5543 return exp.var(option) 5544 return None 5545 5546 def _parse_as_command(self, start: Token) -> exp.Command: 5547 while self._curr: 5548 self._advance() 5549 text = self._find_sql(start, self._prev) 5550 size = len(start.text) 5551 self._warn_unsupported() 5552 return exp.Command(this=text[:size], expression=text[size:]) 5553 5554 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5555 settings = [] 5556 5557 self._match_l_paren() 5558 kind = self._parse_id_var() 5559 5560 if self._match(TokenType.L_PAREN): 5561 while True: 5562 key = self._parse_id_var() 5563 value = self._parse_primary() 5564 5565 if not key and value is None: 5566 break 5567 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5568 self._match(TokenType.R_PAREN) 5569 5570 self._match_r_paren() 5571 5572 return self.expression( 5573 exp.DictProperty, 5574 this=this, 5575 kind=kind.this if kind else None, 5576 settings=settings, 5577 ) 5578 5579 def _parse_dict_range(self, this: str) -> exp.DictRange: 5580 self._match_l_paren() 5581 has_min = self._match_text_seq("MIN") 5582 if has_min: 5583 min = self._parse_var() or self._parse_primary() 5584 self._match_text_seq("MAX") 5585 max = self._parse_var() or self._parse_primary() 5586 else: 5587 max = self._parse_var() or self._parse_primary() 5588 min = exp.Literal.number(0) 5589 self._match_r_paren() 5590 return self.expression(exp.DictRange, this=this, min=min, max=max) 5591 5592 def _parse_comprehension( 5593 self, this: t.Optional[exp.Expression] 5594 ) -> t.Optional[exp.Comprehension]: 5595 index = self._index 5596 expression = self._parse_column() 5597 if not self._match(TokenType.IN): 5598 self._retreat(index - 1) 5599 return None 5600 iterator = self._parse_column() 5601 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5602 return self.expression( 5603 exp.Comprehension, 5604 this=this, 5605 expression=expression, 5606 iterator=iterator, 5607 condition=condition, 5608 ) 5609 5610 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5611 if self._match(TokenType.HEREDOC_STRING): 5612 return self.expression(exp.Heredoc, this=self._prev.text) 5613 5614 if not self._match_text_seq("$"): 5615 return None 5616 5617 tags = ["$"] 5618 tag_text = None 5619 5620 if self._is_connected(): 5621 self._advance() 5622 tags.append(self._prev.text.upper()) 5623 else: 5624 self.raise_error("No closing $ found") 5625 5626 if tags[-1] != "$": 5627 if self._is_connected() and self._match_text_seq("$"): 5628 tag_text = tags[-1] 5629 tags.append("$") 5630 else: 5631 self.raise_error("No closing $ found") 5632 5633 heredoc_start = self._curr 5634 5635 while self._curr: 5636 if self._match_text_seq(*tags, advance=False): 5637 this = self._find_sql(heredoc_start, self._prev) 5638 self._advance(len(tags)) 5639 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5640 5641 self._advance() 5642 5643 self.raise_error(f"No closing {''.join(tags)} found") 5644 return None 5645 5646 def _find_parser( 5647 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5648 ) -> t.Optional[t.Callable]: 5649 if not self._curr: 5650 return None 5651 5652 index = self._index 5653 this = [] 5654 while True: 5655 # The current token might be multiple words 5656 curr = self._curr.text.upper() 5657 key = curr.split(" ") 5658 this.append(curr) 5659 5660 self._advance() 5661 result, trie = in_trie(trie, key) 5662 if result == TrieResult.FAILED: 5663 break 5664 5665 if result == TrieResult.EXISTS: 5666 subparser = parsers[" ".join(this)] 5667 return subparser 5668 5669 self._retreat(index) 5670 return None 5671 5672 def _match(self, token_type, advance=True, expression=None): 5673 if not self._curr: 5674 return None 5675 5676 if self._curr.token_type == token_type: 5677 if advance: 5678 self._advance() 5679 self._add_comments(expression) 5680 return True 5681 5682 return None 5683 5684 def _match_set(self, types, advance=True): 5685 if not self._curr: 5686 return None 5687 5688 if self._curr.token_type in types: 5689 if advance: 5690 self._advance() 5691 return True 5692 5693 return None 5694 5695 def _match_pair(self, token_type_a, token_type_b, advance=True): 5696 if not self._curr or not self._next: 5697 return None 5698 5699 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5700 if advance: 5701 self._advance(2) 5702 return True 5703 5704 return None 5705 5706 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5707 if not self._match(TokenType.L_PAREN, expression=expression): 5708 self.raise_error("Expecting (") 5709 5710 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5711 if not self._match(TokenType.R_PAREN, expression=expression): 5712 self.raise_error("Expecting )") 5713 5714 def _match_texts(self, texts, advance=True): 5715 if self._curr and self._curr.text.upper() in texts: 5716 if advance: 5717 self._advance() 5718 return True 5719 return None 5720 5721 def _match_text_seq(self, *texts, advance=True): 5722 index = self._index 5723 for text in texts: 5724 if self._curr and self._curr.text.upper() == text: 5725 self._advance() 5726 else: 5727 self._retreat(index) 5728 return None 5729 5730 if not advance: 5731 self._retreat(index) 5732 5733 return True 5734 5735 @t.overload 5736 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: ... 5737 5738 @t.overload 5739 def _replace_columns_with_dots( 5740 self, this: t.Optional[exp.Expression] 5741 ) -> t.Optional[exp.Expression]: ... 5742 5743 def _replace_columns_with_dots(self, this): 5744 if isinstance(this, exp.Dot): 5745 exp.replace_children(this, self._replace_columns_with_dots) 5746 elif isinstance(this, exp.Column): 5747 exp.replace_children(this, self._replace_columns_with_dots) 5748 table = this.args.get("table") 5749 this = ( 5750 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5751 ) 5752 5753 return this 5754 5755 def _replace_lambda( 5756 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5757 ) -> t.Optional[exp.Expression]: 5758 if not node: 5759 return node 5760 5761 for column in node.find_all(exp.Column): 5762 if column.parts[0].name in lambda_variables: 5763 dot_or_id = column.to_dot() if column.table else column.this 5764 parent = column.parent 5765 5766 while isinstance(parent, exp.Dot): 5767 if not isinstance(parent.parent, exp.Dot): 5768 parent.replace(dot_or_id) 5769 break 5770 parent = parent.parent 5771 else: 5772 if column is node: 5773 node = dot_or_id 5774 else: 5775 column.replace(dot_or_id) 5776 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1009 def __init__( 1010 self, 1011 error_level: t.Optional[ErrorLevel] = None, 1012 error_message_context: int = 100, 1013 max_errors: int = 3, 1014 dialect: DialectType = None, 1015 ): 1016 from sqlglot.dialects import Dialect 1017 1018 self.error_level = error_level or ErrorLevel.IMMEDIATE 1019 self.error_message_context = error_message_context 1020 self.max_errors = max_errors 1021 self.dialect = Dialect.get_or_raise(dialect) 1022 self.reset()
1034 def parse( 1035 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1036 ) -> t.List[t.Optional[exp.Expression]]: 1037 """ 1038 Parses a list of tokens and returns a list of syntax trees, one tree 1039 per parsed SQL statement. 1040 1041 Args: 1042 raw_tokens: The list of tokens. 1043 sql: The original SQL string, used to produce helpful debug messages. 1044 1045 Returns: 1046 The list of the produced syntax trees. 1047 """ 1048 return self._parse( 1049 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1050 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1052 def parse_into( 1053 self, 1054 expression_types: exp.IntoType, 1055 raw_tokens: t.List[Token], 1056 sql: t.Optional[str] = None, 1057 ) -> t.List[t.Optional[exp.Expression]]: 1058 """ 1059 Parses a list of tokens into a given Expression type. If a collection of Expression 1060 types is given instead, this method will try to parse the token list into each one 1061 of them, stopping at the first for which the parsing succeeds. 1062 1063 Args: 1064 expression_types: The expression type(s) to try and parse the token list into. 1065 raw_tokens: The list of tokens. 1066 sql: The original SQL string, used to produce helpful debug messages. 1067 1068 Returns: 1069 The target Expression. 1070 """ 1071 errors = [] 1072 for expression_type in ensure_list(expression_types): 1073 parser = self.EXPRESSION_PARSERS.get(expression_type) 1074 if not parser: 1075 raise TypeError(f"No parser registered for {expression_type}") 1076 1077 try: 1078 return self._parse(parser, raw_tokens, sql) 1079 except ParseError as e: 1080 e.errors[0]["into_expression"] = expression_type 1081 errors.append(e) 1082 1083 raise ParseError( 1084 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1085 errors=merge_errors(errors), 1086 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1123 def check_errors(self) -> None: 1124 """Logs or raises any found errors, depending on the chosen error level setting.""" 1125 if self.error_level == ErrorLevel.WARN: 1126 for error in self.errors: 1127 logger.error(str(error)) 1128 elif self.error_level == ErrorLevel.RAISE and self.errors: 1129 raise ParseError( 1130 concat_messages(self.errors, self.max_errors), 1131 errors=merge_errors(self.errors), 1132 )
Logs or raises any found errors, depending on the chosen error level setting.
1134 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1135 """ 1136 Appends an error in the list of recorded errors or raises it, depending on the chosen 1137 error level setting. 1138 """ 1139 token = token or self._curr or self._prev or Token.string("") 1140 start = token.start 1141 end = token.end + 1 1142 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1143 highlight = self.sql[start:end] 1144 end_context = self.sql[end : end + self.error_message_context] 1145 1146 error = ParseError.new( 1147 f"{message}. Line {token.line}, Col: {token.col}.\n" 1148 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1149 description=message, 1150 line=token.line, 1151 col=token.col, 1152 start_context=start_context, 1153 highlight=highlight, 1154 end_context=end_context, 1155 ) 1156 1157 if self.error_level == ErrorLevel.IMMEDIATE: 1158 raise error 1159 1160 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1162 def expression( 1163 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1164 ) -> E: 1165 """ 1166 Creates a new, validated Expression. 1167 1168 Args: 1169 exp_class: The expression class to instantiate. 1170 comments: An optional list of comments to attach to the expression. 1171 kwargs: The arguments to set for the expression along with their respective values. 1172 1173 Returns: 1174 The target expression. 1175 """ 1176 instance = exp_class(**kwargs) 1177 instance.add_comments(comments) if comments else self._add_comments(instance) 1178 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1185 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1186 """ 1187 Validates an Expression, making sure that all its mandatory arguments are set. 1188 1189 Args: 1190 expression: The expression to validate. 1191 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1192 1193 Returns: 1194 The validated expression. 1195 """ 1196 if self.error_level != ErrorLevel.IGNORE: 1197 for error_message in expression.error_messages(args): 1198 self.raise_error(error_message) 1199 1200 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.