sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18logger = logging.getLogger("sqlglot") 19 20 21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.array(*keys, copy=False), 33 values=exp.array(*values, copy=False), 34 ) 35 36 37def parse_like(args: t.List) -> exp.Escape | exp.Like: 38 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 39 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 40 41 42def binary_range_parser( 43 expr_type: t.Type[exp.Expression], 44) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 45 return lambda self, this: self._parse_escape( 46 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 47 ) 48 49 50def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 51 # Default argument order is base, expression 52 this = seq_get(args, 0) 53 expression = seq_get(args, 1) 54 55 if expression: 56 if not dialect.LOG_BASE_FIRST: 57 this, expression = expression, this 58 return exp.Log(this=this, expression=expression) 59 60 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 61 62 63def parse_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 64 def _parser(args: t.List, dialect: Dialect) -> E: 65 expression = expr_type( 66 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 67 ) 68 if len(args) > 2 and expr_type is exp.JSONExtract: 69 expression.set("expressions", args[2:]) 70 71 return expression 72 73 return _parser 74 75 76class _Parser(type): 77 def __new__(cls, clsname, bases, attrs): 78 klass = super().__new__(cls, clsname, bases, attrs) 79 80 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 81 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 82 83 return klass 84 85 86class Parser(metaclass=_Parser): 87 """ 88 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 89 90 Args: 91 error_level: The desired error level. 92 Default: ErrorLevel.IMMEDIATE 93 error_message_context: Determines the amount of context to capture from a 94 query string when displaying the error message (in number of characters). 95 Default: 100 96 max_errors: Maximum number of error messages to include in a raised ParseError. 97 This is only relevant if error_level is ErrorLevel.RAISE. 98 Default: 3 99 """ 100 101 FUNCTIONS: t.Dict[str, t.Callable] = { 102 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 103 "CONCAT": lambda args, dialect: exp.Concat( 104 expressions=args, 105 safe=not dialect.STRICT_STRING_CONCAT, 106 coalesce=dialect.CONCAT_COALESCE, 107 ), 108 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 109 expressions=args, 110 safe=not dialect.STRICT_STRING_CONCAT, 111 coalesce=dialect.CONCAT_COALESCE, 112 ), 113 "DATE_TO_DATE_STR": lambda args: exp.Cast( 114 this=seq_get(args, 0), 115 to=exp.DataType(this=exp.DataType.Type.TEXT), 116 ), 117 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 118 "JSON_EXTRACT": parse_extract_json_with_path(exp.JSONExtract), 119 "JSON_EXTRACT_SCALAR": parse_extract_json_with_path(exp.JSONExtractScalar), 120 "JSON_EXTRACT_PATH_TEXT": parse_extract_json_with_path(exp.JSONExtractScalar), 121 "LIKE": parse_like, 122 "LOG": parse_logarithm, 123 "TIME_TO_TIME_STR": lambda args: exp.Cast( 124 this=seq_get(args, 0), 125 to=exp.DataType(this=exp.DataType.Type.TEXT), 126 ), 127 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 128 this=exp.Cast( 129 this=seq_get(args, 0), 130 to=exp.DataType(this=exp.DataType.Type.TEXT), 131 ), 132 start=exp.Literal.number(1), 133 length=exp.Literal.number(10), 134 ), 135 "VAR_MAP": parse_var_map, 136 } 137 138 NO_PAREN_FUNCTIONS = { 139 TokenType.CURRENT_DATE: exp.CurrentDate, 140 TokenType.CURRENT_DATETIME: exp.CurrentDate, 141 TokenType.CURRENT_TIME: exp.CurrentTime, 142 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 143 TokenType.CURRENT_USER: exp.CurrentUser, 144 } 145 146 STRUCT_TYPE_TOKENS = { 147 TokenType.NESTED, 148 TokenType.STRUCT, 149 } 150 151 NESTED_TYPE_TOKENS = { 152 TokenType.ARRAY, 153 TokenType.LOWCARDINALITY, 154 TokenType.MAP, 155 TokenType.NULLABLE, 156 *STRUCT_TYPE_TOKENS, 157 } 158 159 ENUM_TYPE_TOKENS = { 160 TokenType.ENUM, 161 TokenType.ENUM8, 162 TokenType.ENUM16, 163 } 164 165 AGGREGATE_TYPE_TOKENS = { 166 TokenType.AGGREGATEFUNCTION, 167 TokenType.SIMPLEAGGREGATEFUNCTION, 168 } 169 170 TYPE_TOKENS = { 171 TokenType.BIT, 172 TokenType.BOOLEAN, 173 TokenType.TINYINT, 174 TokenType.UTINYINT, 175 TokenType.SMALLINT, 176 TokenType.USMALLINT, 177 TokenType.INT, 178 TokenType.UINT, 179 TokenType.BIGINT, 180 TokenType.UBIGINT, 181 TokenType.INT128, 182 TokenType.UINT128, 183 TokenType.INT256, 184 TokenType.UINT256, 185 TokenType.MEDIUMINT, 186 TokenType.UMEDIUMINT, 187 TokenType.FIXEDSTRING, 188 TokenType.FLOAT, 189 TokenType.DOUBLE, 190 TokenType.CHAR, 191 TokenType.NCHAR, 192 TokenType.VARCHAR, 193 TokenType.NVARCHAR, 194 TokenType.BPCHAR, 195 TokenType.TEXT, 196 TokenType.MEDIUMTEXT, 197 TokenType.LONGTEXT, 198 TokenType.MEDIUMBLOB, 199 TokenType.LONGBLOB, 200 TokenType.BINARY, 201 TokenType.VARBINARY, 202 TokenType.JSON, 203 TokenType.JSONB, 204 TokenType.INTERVAL, 205 TokenType.TINYBLOB, 206 TokenType.TINYTEXT, 207 TokenType.TIME, 208 TokenType.TIMETZ, 209 TokenType.TIMESTAMP, 210 TokenType.TIMESTAMP_S, 211 TokenType.TIMESTAMP_MS, 212 TokenType.TIMESTAMP_NS, 213 TokenType.TIMESTAMPTZ, 214 TokenType.TIMESTAMPLTZ, 215 TokenType.DATETIME, 216 TokenType.DATETIME64, 217 TokenType.DATE, 218 TokenType.DATE32, 219 TokenType.INT4RANGE, 220 TokenType.INT4MULTIRANGE, 221 TokenType.INT8RANGE, 222 TokenType.INT8MULTIRANGE, 223 TokenType.NUMRANGE, 224 TokenType.NUMMULTIRANGE, 225 TokenType.TSRANGE, 226 TokenType.TSMULTIRANGE, 227 TokenType.TSTZRANGE, 228 TokenType.TSTZMULTIRANGE, 229 TokenType.DATERANGE, 230 TokenType.DATEMULTIRANGE, 231 TokenType.DECIMAL, 232 TokenType.UDECIMAL, 233 TokenType.BIGDECIMAL, 234 TokenType.UUID, 235 TokenType.GEOGRAPHY, 236 TokenType.GEOMETRY, 237 TokenType.HLLSKETCH, 238 TokenType.HSTORE, 239 TokenType.PSEUDO_TYPE, 240 TokenType.SUPER, 241 TokenType.SERIAL, 242 TokenType.SMALLSERIAL, 243 TokenType.BIGSERIAL, 244 TokenType.XML, 245 TokenType.YEAR, 246 TokenType.UNIQUEIDENTIFIER, 247 TokenType.USERDEFINED, 248 TokenType.MONEY, 249 TokenType.SMALLMONEY, 250 TokenType.ROWVERSION, 251 TokenType.IMAGE, 252 TokenType.VARIANT, 253 TokenType.OBJECT, 254 TokenType.OBJECT_IDENTIFIER, 255 TokenType.INET, 256 TokenType.IPADDRESS, 257 TokenType.IPPREFIX, 258 TokenType.IPV4, 259 TokenType.IPV6, 260 TokenType.UNKNOWN, 261 TokenType.NULL, 262 *ENUM_TYPE_TOKENS, 263 *NESTED_TYPE_TOKENS, 264 *AGGREGATE_TYPE_TOKENS, 265 } 266 267 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 268 TokenType.BIGINT: TokenType.UBIGINT, 269 TokenType.INT: TokenType.UINT, 270 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 271 TokenType.SMALLINT: TokenType.USMALLINT, 272 TokenType.TINYINT: TokenType.UTINYINT, 273 TokenType.DECIMAL: TokenType.UDECIMAL, 274 } 275 276 SUBQUERY_PREDICATES = { 277 TokenType.ANY: exp.Any, 278 TokenType.ALL: exp.All, 279 TokenType.EXISTS: exp.Exists, 280 TokenType.SOME: exp.Any, 281 } 282 283 RESERVED_TOKENS = { 284 *Tokenizer.SINGLE_TOKENS.values(), 285 TokenType.SELECT, 286 } 287 288 DB_CREATABLES = { 289 TokenType.DATABASE, 290 TokenType.SCHEMA, 291 TokenType.TABLE, 292 TokenType.VIEW, 293 TokenType.MODEL, 294 TokenType.DICTIONARY, 295 } 296 297 CREATABLES = { 298 TokenType.COLUMN, 299 TokenType.CONSTRAINT, 300 TokenType.FUNCTION, 301 TokenType.INDEX, 302 TokenType.PROCEDURE, 303 TokenType.FOREIGN_KEY, 304 *DB_CREATABLES, 305 } 306 307 # Tokens that can represent identifiers 308 ID_VAR_TOKENS = { 309 TokenType.VAR, 310 TokenType.ANTI, 311 TokenType.APPLY, 312 TokenType.ASC, 313 TokenType.AUTO_INCREMENT, 314 TokenType.BEGIN, 315 TokenType.BPCHAR, 316 TokenType.CACHE, 317 TokenType.CASE, 318 TokenType.COLLATE, 319 TokenType.COMMAND, 320 TokenType.COMMENT, 321 TokenType.COMMIT, 322 TokenType.CONSTRAINT, 323 TokenType.DEFAULT, 324 TokenType.DELETE, 325 TokenType.DESC, 326 TokenType.DESCRIBE, 327 TokenType.DICTIONARY, 328 TokenType.DIV, 329 TokenType.END, 330 TokenType.EXECUTE, 331 TokenType.ESCAPE, 332 TokenType.FALSE, 333 TokenType.FIRST, 334 TokenType.FILTER, 335 TokenType.FINAL, 336 TokenType.FORMAT, 337 TokenType.FULL, 338 TokenType.IS, 339 TokenType.ISNULL, 340 TokenType.INTERVAL, 341 TokenType.KEEP, 342 TokenType.KILL, 343 TokenType.LEFT, 344 TokenType.LOAD, 345 TokenType.MERGE, 346 TokenType.NATURAL, 347 TokenType.NEXT, 348 TokenType.OFFSET, 349 TokenType.OPERATOR, 350 TokenType.ORDINALITY, 351 TokenType.OVERLAPS, 352 TokenType.OVERWRITE, 353 TokenType.PARTITION, 354 TokenType.PERCENT, 355 TokenType.PIVOT, 356 TokenType.PRAGMA, 357 TokenType.RANGE, 358 TokenType.RECURSIVE, 359 TokenType.REFERENCES, 360 TokenType.REFRESH, 361 TokenType.REPLACE, 362 TokenType.RIGHT, 363 TokenType.ROW, 364 TokenType.ROWS, 365 TokenType.SEMI, 366 TokenType.SET, 367 TokenType.SETTINGS, 368 TokenType.SHOW, 369 TokenType.TEMPORARY, 370 TokenType.TOP, 371 TokenType.TRUE, 372 TokenType.UNIQUE, 373 TokenType.UNPIVOT, 374 TokenType.UPDATE, 375 TokenType.USE, 376 TokenType.VOLATILE, 377 TokenType.WINDOW, 378 *CREATABLES, 379 *SUBQUERY_PREDICATES, 380 *TYPE_TOKENS, 381 *NO_PAREN_FUNCTIONS, 382 } 383 384 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 385 386 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 387 TokenType.ANTI, 388 TokenType.APPLY, 389 TokenType.ASOF, 390 TokenType.FULL, 391 TokenType.LEFT, 392 TokenType.LOCK, 393 TokenType.NATURAL, 394 TokenType.OFFSET, 395 TokenType.RIGHT, 396 TokenType.SEMI, 397 TokenType.WINDOW, 398 } 399 400 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 401 402 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 403 404 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 405 406 FUNC_TOKENS = { 407 TokenType.COLLATE, 408 TokenType.COMMAND, 409 TokenType.CURRENT_DATE, 410 TokenType.CURRENT_DATETIME, 411 TokenType.CURRENT_TIMESTAMP, 412 TokenType.CURRENT_TIME, 413 TokenType.CURRENT_USER, 414 TokenType.FILTER, 415 TokenType.FIRST, 416 TokenType.FORMAT, 417 TokenType.GLOB, 418 TokenType.IDENTIFIER, 419 TokenType.INDEX, 420 TokenType.ISNULL, 421 TokenType.ILIKE, 422 TokenType.INSERT, 423 TokenType.LIKE, 424 TokenType.MERGE, 425 TokenType.OFFSET, 426 TokenType.PRIMARY_KEY, 427 TokenType.RANGE, 428 TokenType.REPLACE, 429 TokenType.RLIKE, 430 TokenType.ROW, 431 TokenType.UNNEST, 432 TokenType.VAR, 433 TokenType.LEFT, 434 TokenType.RIGHT, 435 TokenType.DATE, 436 TokenType.DATETIME, 437 TokenType.TABLE, 438 TokenType.TIMESTAMP, 439 TokenType.TIMESTAMPTZ, 440 TokenType.WINDOW, 441 TokenType.XOR, 442 *TYPE_TOKENS, 443 *SUBQUERY_PREDICATES, 444 } 445 446 CONJUNCTION = { 447 TokenType.AND: exp.And, 448 TokenType.OR: exp.Or, 449 } 450 451 EQUALITY = { 452 TokenType.COLON_EQ: exp.PropertyEQ, 453 TokenType.EQ: exp.EQ, 454 TokenType.NEQ: exp.NEQ, 455 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 456 } 457 458 COMPARISON = { 459 TokenType.GT: exp.GT, 460 TokenType.GTE: exp.GTE, 461 TokenType.LT: exp.LT, 462 TokenType.LTE: exp.LTE, 463 } 464 465 BITWISE = { 466 TokenType.AMP: exp.BitwiseAnd, 467 TokenType.CARET: exp.BitwiseXor, 468 TokenType.PIPE: exp.BitwiseOr, 469 } 470 471 TERM = { 472 TokenType.DASH: exp.Sub, 473 TokenType.PLUS: exp.Add, 474 TokenType.MOD: exp.Mod, 475 TokenType.COLLATE: exp.Collate, 476 } 477 478 FACTOR = { 479 TokenType.DIV: exp.IntDiv, 480 TokenType.LR_ARROW: exp.Distance, 481 TokenType.SLASH: exp.Div, 482 TokenType.STAR: exp.Mul, 483 } 484 485 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 486 487 TIMES = { 488 TokenType.TIME, 489 TokenType.TIMETZ, 490 } 491 492 TIMESTAMPS = { 493 TokenType.TIMESTAMP, 494 TokenType.TIMESTAMPTZ, 495 TokenType.TIMESTAMPLTZ, 496 *TIMES, 497 } 498 499 SET_OPERATIONS = { 500 TokenType.UNION, 501 TokenType.INTERSECT, 502 TokenType.EXCEPT, 503 } 504 505 JOIN_METHODS = { 506 TokenType.NATURAL, 507 TokenType.ASOF, 508 } 509 510 JOIN_SIDES = { 511 TokenType.LEFT, 512 TokenType.RIGHT, 513 TokenType.FULL, 514 } 515 516 JOIN_KINDS = { 517 TokenType.INNER, 518 TokenType.OUTER, 519 TokenType.CROSS, 520 TokenType.SEMI, 521 TokenType.ANTI, 522 } 523 524 JOIN_HINTS: t.Set[str] = set() 525 526 LAMBDAS = { 527 TokenType.ARROW: lambda self, expressions: self.expression( 528 exp.Lambda, 529 this=self._replace_lambda( 530 self._parse_conjunction(), 531 {node.name for node in expressions}, 532 ), 533 expressions=expressions, 534 ), 535 TokenType.FARROW: lambda self, expressions: self.expression( 536 exp.Kwarg, 537 this=exp.var(expressions[0].name), 538 expression=self._parse_conjunction(), 539 ), 540 } 541 542 COLUMN_OPERATORS = { 543 TokenType.DOT: None, 544 TokenType.DCOLON: lambda self, this, to: self.expression( 545 exp.Cast if self.STRICT_CAST else exp.TryCast, 546 this=this, 547 to=to, 548 ), 549 TokenType.ARROW: lambda self, this, path: self.expression( 550 exp.JSONExtract, 551 this=this, 552 expression=self.dialect.to_json_path(path), 553 ), 554 TokenType.DARROW: lambda self, this, path: self.expression( 555 exp.JSONExtractScalar, 556 this=this, 557 expression=self.dialect.to_json_path(path), 558 ), 559 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 560 exp.JSONBExtract, 561 this=this, 562 expression=path, 563 ), 564 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 565 exp.JSONBExtractScalar, 566 this=this, 567 expression=path, 568 ), 569 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 570 exp.JSONBContains, 571 this=this, 572 expression=key, 573 ), 574 } 575 576 EXPRESSION_PARSERS = { 577 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 578 exp.Column: lambda self: self._parse_column(), 579 exp.Condition: lambda self: self._parse_conjunction(), 580 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 581 exp.Expression: lambda self: self._parse_statement(), 582 exp.From: lambda self: self._parse_from(), 583 exp.Group: lambda self: self._parse_group(), 584 exp.Having: lambda self: self._parse_having(), 585 exp.Identifier: lambda self: self._parse_id_var(), 586 exp.Join: lambda self: self._parse_join(), 587 exp.Lambda: lambda self: self._parse_lambda(), 588 exp.Lateral: lambda self: self._parse_lateral(), 589 exp.Limit: lambda self: self._parse_limit(), 590 exp.Offset: lambda self: self._parse_offset(), 591 exp.Order: lambda self: self._parse_order(), 592 exp.Ordered: lambda self: self._parse_ordered(), 593 exp.Properties: lambda self: self._parse_properties(), 594 exp.Qualify: lambda self: self._parse_qualify(), 595 exp.Returning: lambda self: self._parse_returning(), 596 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 597 exp.Table: lambda self: self._parse_table_parts(), 598 exp.TableAlias: lambda self: self._parse_table_alias(), 599 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 600 exp.Where: lambda self: self._parse_where(), 601 exp.Window: lambda self: self._parse_named_window(), 602 exp.With: lambda self: self._parse_with(), 603 "JOIN_TYPE": lambda self: self._parse_join_parts(), 604 } 605 606 STATEMENT_PARSERS = { 607 TokenType.ALTER: lambda self: self._parse_alter(), 608 TokenType.BEGIN: lambda self: self._parse_transaction(), 609 TokenType.CACHE: lambda self: self._parse_cache(), 610 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 611 TokenType.COMMENT: lambda self: self._parse_comment(), 612 TokenType.CREATE: lambda self: self._parse_create(), 613 TokenType.DELETE: lambda self: self._parse_delete(), 614 TokenType.DESC: lambda self: self._parse_describe(), 615 TokenType.DESCRIBE: lambda self: self._parse_describe(), 616 TokenType.DROP: lambda self: self._parse_drop(), 617 TokenType.INSERT: lambda self: self._parse_insert(), 618 TokenType.KILL: lambda self: self._parse_kill(), 619 TokenType.LOAD: lambda self: self._parse_load(), 620 TokenType.MERGE: lambda self: self._parse_merge(), 621 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 622 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 623 TokenType.REFRESH: lambda self: self._parse_refresh(), 624 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 625 TokenType.SET: lambda self: self._parse_set(), 626 TokenType.UNCACHE: lambda self: self._parse_uncache(), 627 TokenType.UPDATE: lambda self: self._parse_update(), 628 TokenType.USE: lambda self: self.expression( 629 exp.Use, 630 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 631 and exp.var(self._prev.text), 632 this=self._parse_table(schema=False), 633 ), 634 } 635 636 UNARY_PARSERS = { 637 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 638 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 639 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 640 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 641 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 642 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 643 } 644 645 PRIMARY_PARSERS = { 646 TokenType.STRING: lambda self, token: self.expression( 647 exp.Literal, this=token.text, is_string=True 648 ), 649 TokenType.NUMBER: lambda self, token: self.expression( 650 exp.Literal, this=token.text, is_string=False 651 ), 652 TokenType.STAR: lambda self, _: self.expression( 653 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 654 ), 655 TokenType.NULL: lambda self, _: self.expression(exp.Null), 656 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 657 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 658 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 659 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 660 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 661 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 662 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 663 exp.National, this=token.text 664 ), 665 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 666 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 667 exp.RawString, this=token.text 668 ), 669 TokenType.UNICODE_STRING: lambda self, token: self.expression( 670 exp.UnicodeString, 671 this=token.text, 672 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 673 ), 674 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 675 } 676 677 PLACEHOLDER_PARSERS = { 678 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 679 TokenType.PARAMETER: lambda self: self._parse_parameter(), 680 TokenType.COLON: lambda self: ( 681 self.expression(exp.Placeholder, this=self._prev.text) 682 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 683 else None 684 ), 685 } 686 687 RANGE_PARSERS = { 688 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 689 TokenType.GLOB: binary_range_parser(exp.Glob), 690 TokenType.ILIKE: binary_range_parser(exp.ILike), 691 TokenType.IN: lambda self, this: self._parse_in(this), 692 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 693 TokenType.IS: lambda self, this: self._parse_is(this), 694 TokenType.LIKE: binary_range_parser(exp.Like), 695 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 696 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 697 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 698 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 699 } 700 701 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 702 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 703 "AUTO": lambda self: self._parse_auto_property(), 704 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 705 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 706 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 707 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 708 "CHECKSUM": lambda self: self._parse_checksum(), 709 "CLUSTER BY": lambda self: self._parse_cluster(), 710 "CLUSTERED": lambda self: self._parse_clustered_by(), 711 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 712 exp.CollateProperty, **kwargs 713 ), 714 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 715 "CONTAINS": lambda self: self._parse_contains_property(), 716 "COPY": lambda self: self._parse_copy_property(), 717 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 718 "DEFINER": lambda self: self._parse_definer(), 719 "DETERMINISTIC": lambda self: self.expression( 720 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 721 ), 722 "DISTKEY": lambda self: self._parse_distkey(), 723 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 724 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 725 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 726 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 727 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 728 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 729 "FREESPACE": lambda self: self._parse_freespace(), 730 "HEAP": lambda self: self.expression(exp.HeapProperty), 731 "IMMUTABLE": lambda self: self.expression( 732 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 733 ), 734 "INHERITS": lambda self: self.expression( 735 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 736 ), 737 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 738 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 739 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 740 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 741 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 742 "LIKE": lambda self: self._parse_create_like(), 743 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 744 "LOCK": lambda self: self._parse_locking(), 745 "LOCKING": lambda self: self._parse_locking(), 746 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 747 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 748 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 749 "MODIFIES": lambda self: self._parse_modifies_property(), 750 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 751 "NO": lambda self: self._parse_no_property(), 752 "ON": lambda self: self._parse_on_property(), 753 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 754 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 755 "PARTITION": lambda self: self._parse_partitioned_of(), 756 "PARTITION BY": lambda self: self._parse_partitioned_by(), 757 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 758 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 759 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 760 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 761 "READS": lambda self: self._parse_reads_property(), 762 "REMOTE": lambda self: self._parse_remote_with_connection(), 763 "RETURNS": lambda self: self._parse_returns(), 764 "ROW": lambda self: self._parse_row(), 765 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 766 "SAMPLE": lambda self: self.expression( 767 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 768 ), 769 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 770 "SETTINGS": lambda self: self.expression( 771 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 772 ), 773 "SORTKEY": lambda self: self._parse_sortkey(), 774 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 775 "STABLE": lambda self: self.expression( 776 exp.StabilityProperty, this=exp.Literal.string("STABLE") 777 ), 778 "STORED": lambda self: self._parse_stored(), 779 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 780 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 781 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 782 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 783 "TO": lambda self: self._parse_to_table(), 784 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 785 "TRANSFORM": lambda self: self.expression( 786 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 787 ), 788 "TTL": lambda self: self._parse_ttl(), 789 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 790 "VOLATILE": lambda self: self._parse_volatile_property(), 791 "WITH": lambda self: self._parse_with_property(), 792 } 793 794 CONSTRAINT_PARSERS = { 795 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 796 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 797 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 798 "CHARACTER SET": lambda self: self.expression( 799 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 800 ), 801 "CHECK": lambda self: self.expression( 802 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 803 ), 804 "COLLATE": lambda self: self.expression( 805 exp.CollateColumnConstraint, this=self._parse_var() 806 ), 807 "COMMENT": lambda self: self.expression( 808 exp.CommentColumnConstraint, this=self._parse_string() 809 ), 810 "COMPRESS": lambda self: self._parse_compress(), 811 "CLUSTERED": lambda self: self.expression( 812 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 813 ), 814 "NONCLUSTERED": lambda self: self.expression( 815 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 816 ), 817 "DEFAULT": lambda self: self.expression( 818 exp.DefaultColumnConstraint, this=self._parse_bitwise() 819 ), 820 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 821 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 822 "FORMAT": lambda self: self.expression( 823 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 824 ), 825 "GENERATED": lambda self: self._parse_generated_as_identity(), 826 "IDENTITY": lambda self: self._parse_auto_increment(), 827 "INLINE": lambda self: self._parse_inline(), 828 "LIKE": lambda self: self._parse_create_like(), 829 "NOT": lambda self: self._parse_not_constraint(), 830 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 831 "ON": lambda self: ( 832 self._match(TokenType.UPDATE) 833 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 834 ) 835 or self.expression(exp.OnProperty, this=self._parse_id_var()), 836 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 837 "PERIOD": lambda self: self._parse_period_for_system_time(), 838 "PRIMARY KEY": lambda self: self._parse_primary_key(), 839 "REFERENCES": lambda self: self._parse_references(match=False), 840 "TITLE": lambda self: self.expression( 841 exp.TitleColumnConstraint, this=self._parse_var_or_string() 842 ), 843 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 844 "UNIQUE": lambda self: self._parse_unique(), 845 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 846 "WITH": lambda self: self.expression( 847 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 848 ), 849 } 850 851 ALTER_PARSERS = { 852 "ADD": lambda self: self._parse_alter_table_add(), 853 "ALTER": lambda self: self._parse_alter_table_alter(), 854 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 855 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 856 "DROP": lambda self: self._parse_alter_table_drop(), 857 "RENAME": lambda self: self._parse_alter_table_rename(), 858 } 859 860 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 861 862 NO_PAREN_FUNCTION_PARSERS = { 863 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 864 "CASE": lambda self: self._parse_case(), 865 "IF": lambda self: self._parse_if(), 866 "NEXT": lambda self: self._parse_next_value_for(), 867 } 868 869 INVALID_FUNC_NAME_TOKENS = { 870 TokenType.IDENTIFIER, 871 TokenType.STRING, 872 } 873 874 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 875 876 FUNCTION_PARSERS = { 877 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 878 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 879 "DECODE": lambda self: self._parse_decode(), 880 "EXTRACT": lambda self: self._parse_extract(), 881 "JSON_OBJECT": lambda self: self._parse_json_object(), 882 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 883 "JSON_TABLE": lambda self: self._parse_json_table(), 884 "MATCH": lambda self: self._parse_match_against(), 885 "OPENJSON": lambda self: self._parse_open_json(), 886 "POSITION": lambda self: self._parse_position(), 887 "PREDICT": lambda self: self._parse_predict(), 888 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 889 "STRING_AGG": lambda self: self._parse_string_agg(), 890 "SUBSTRING": lambda self: self._parse_substring(), 891 "TRIM": lambda self: self._parse_trim(), 892 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 893 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 894 } 895 896 QUERY_MODIFIER_PARSERS = { 897 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 898 TokenType.WHERE: lambda self: ("where", self._parse_where()), 899 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 900 TokenType.HAVING: lambda self: ("having", self._parse_having()), 901 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 902 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 903 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 904 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 905 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 906 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 907 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 908 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 909 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 910 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 911 TokenType.CLUSTER_BY: lambda self: ( 912 "cluster", 913 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 914 ), 915 TokenType.DISTRIBUTE_BY: lambda self: ( 916 "distribute", 917 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 918 ), 919 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 920 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 921 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 922 } 923 924 SET_PARSERS = { 925 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 926 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 927 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 928 "TRANSACTION": lambda self: self._parse_set_transaction(), 929 } 930 931 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 932 933 TYPE_LITERAL_PARSERS = { 934 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 935 } 936 937 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 938 939 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 940 941 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 942 943 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 944 TRANSACTION_CHARACTERISTICS = { 945 "ISOLATION LEVEL REPEATABLE READ", 946 "ISOLATION LEVEL READ COMMITTED", 947 "ISOLATION LEVEL READ UNCOMMITTED", 948 "ISOLATION LEVEL SERIALIZABLE", 949 "READ WRITE", 950 "READ ONLY", 951 } 952 953 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 954 955 CLONE_KEYWORDS = {"CLONE", "COPY"} 956 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 957 958 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 959 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 960 961 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 962 963 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 964 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 965 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 966 967 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 968 969 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 970 971 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 972 973 DISTINCT_TOKENS = {TokenType.DISTINCT} 974 975 NULL_TOKENS = {TokenType.NULL} 976 977 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 978 979 STRICT_CAST = True 980 981 PREFIXED_PIVOT_COLUMNS = False 982 IDENTIFY_PIVOT_STRINGS = False 983 984 LOG_DEFAULTS_TO_LN = False 985 986 # Whether or not ADD is present for each column added by ALTER TABLE 987 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 988 989 # Whether or not the table sample clause expects CSV syntax 990 TABLESAMPLE_CSV = False 991 992 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 993 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 994 995 # Whether the TRIM function expects the characters to trim as its first argument 996 TRIM_PATTERN_FIRST = False 997 998 # Whether or not string aliases are supported `SELECT COUNT(*) 'count'` 999 STRING_ALIASES = False 1000 1001 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1002 MODIFIERS_ATTACHED_TO_UNION = True 1003 UNION_MODIFIERS = {"order", "limit", "offset"} 1004 1005 # Parses no parenthesis if statements as commands 1006 NO_PAREN_IF_COMMANDS = True 1007 1008 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1009 # If this is True and '(' is not found, the keyword will be treated as an identifier 1010 VALUES_FOLLOWED_BY_PAREN = True 1011 1012 __slots__ = ( 1013 "error_level", 1014 "error_message_context", 1015 "max_errors", 1016 "dialect", 1017 "sql", 1018 "errors", 1019 "_tokens", 1020 "_index", 1021 "_curr", 1022 "_next", 1023 "_prev", 1024 "_prev_comments", 1025 ) 1026 1027 # Autofilled 1028 SHOW_TRIE: t.Dict = {} 1029 SET_TRIE: t.Dict = {} 1030 1031 def __init__( 1032 self, 1033 error_level: t.Optional[ErrorLevel] = None, 1034 error_message_context: int = 100, 1035 max_errors: int = 3, 1036 dialect: DialectType = None, 1037 ): 1038 from sqlglot.dialects import Dialect 1039 1040 self.error_level = error_level or ErrorLevel.IMMEDIATE 1041 self.error_message_context = error_message_context 1042 self.max_errors = max_errors 1043 self.dialect = Dialect.get_or_raise(dialect) 1044 self.reset() 1045 1046 def reset(self): 1047 self.sql = "" 1048 self.errors = [] 1049 self._tokens = [] 1050 self._index = 0 1051 self._curr = None 1052 self._next = None 1053 self._prev = None 1054 self._prev_comments = None 1055 1056 def parse( 1057 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1058 ) -> t.List[t.Optional[exp.Expression]]: 1059 """ 1060 Parses a list of tokens and returns a list of syntax trees, one tree 1061 per parsed SQL statement. 1062 1063 Args: 1064 raw_tokens: The list of tokens. 1065 sql: The original SQL string, used to produce helpful debug messages. 1066 1067 Returns: 1068 The list of the produced syntax trees. 1069 """ 1070 return self._parse( 1071 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1072 ) 1073 1074 def parse_into( 1075 self, 1076 expression_types: exp.IntoType, 1077 raw_tokens: t.List[Token], 1078 sql: t.Optional[str] = None, 1079 ) -> t.List[t.Optional[exp.Expression]]: 1080 """ 1081 Parses a list of tokens into a given Expression type. If a collection of Expression 1082 types is given instead, this method will try to parse the token list into each one 1083 of them, stopping at the first for which the parsing succeeds. 1084 1085 Args: 1086 expression_types: The expression type(s) to try and parse the token list into. 1087 raw_tokens: The list of tokens. 1088 sql: The original SQL string, used to produce helpful debug messages. 1089 1090 Returns: 1091 The target Expression. 1092 """ 1093 errors = [] 1094 for expression_type in ensure_list(expression_types): 1095 parser = self.EXPRESSION_PARSERS.get(expression_type) 1096 if not parser: 1097 raise TypeError(f"No parser registered for {expression_type}") 1098 1099 try: 1100 return self._parse(parser, raw_tokens, sql) 1101 except ParseError as e: 1102 e.errors[0]["into_expression"] = expression_type 1103 errors.append(e) 1104 1105 raise ParseError( 1106 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1107 errors=merge_errors(errors), 1108 ) from errors[-1] 1109 1110 def _parse( 1111 self, 1112 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1113 raw_tokens: t.List[Token], 1114 sql: t.Optional[str] = None, 1115 ) -> t.List[t.Optional[exp.Expression]]: 1116 self.reset() 1117 self.sql = sql or "" 1118 1119 total = len(raw_tokens) 1120 chunks: t.List[t.List[Token]] = [[]] 1121 1122 for i, token in enumerate(raw_tokens): 1123 if token.token_type == TokenType.SEMICOLON: 1124 if i < total - 1: 1125 chunks.append([]) 1126 else: 1127 chunks[-1].append(token) 1128 1129 expressions = [] 1130 1131 for tokens in chunks: 1132 self._index = -1 1133 self._tokens = tokens 1134 self._advance() 1135 1136 expressions.append(parse_method(self)) 1137 1138 if self._index < len(self._tokens): 1139 self.raise_error("Invalid expression / Unexpected token") 1140 1141 self.check_errors() 1142 1143 return expressions 1144 1145 def check_errors(self) -> None: 1146 """Logs or raises any found errors, depending on the chosen error level setting.""" 1147 if self.error_level == ErrorLevel.WARN: 1148 for error in self.errors: 1149 logger.error(str(error)) 1150 elif self.error_level == ErrorLevel.RAISE and self.errors: 1151 raise ParseError( 1152 concat_messages(self.errors, self.max_errors), 1153 errors=merge_errors(self.errors), 1154 ) 1155 1156 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1157 """ 1158 Appends an error in the list of recorded errors or raises it, depending on the chosen 1159 error level setting. 1160 """ 1161 token = token or self._curr or self._prev or Token.string("") 1162 start = token.start 1163 end = token.end + 1 1164 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1165 highlight = self.sql[start:end] 1166 end_context = self.sql[end : end + self.error_message_context] 1167 1168 error = ParseError.new( 1169 f"{message}. Line {token.line}, Col: {token.col}.\n" 1170 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1171 description=message, 1172 line=token.line, 1173 col=token.col, 1174 start_context=start_context, 1175 highlight=highlight, 1176 end_context=end_context, 1177 ) 1178 1179 if self.error_level == ErrorLevel.IMMEDIATE: 1180 raise error 1181 1182 self.errors.append(error) 1183 1184 def expression( 1185 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1186 ) -> E: 1187 """ 1188 Creates a new, validated Expression. 1189 1190 Args: 1191 exp_class: The expression class to instantiate. 1192 comments: An optional list of comments to attach to the expression. 1193 kwargs: The arguments to set for the expression along with their respective values. 1194 1195 Returns: 1196 The target expression. 1197 """ 1198 instance = exp_class(**kwargs) 1199 instance.add_comments(comments) if comments else self._add_comments(instance) 1200 return self.validate_expression(instance) 1201 1202 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1203 if expression and self._prev_comments: 1204 expression.add_comments(self._prev_comments) 1205 self._prev_comments = None 1206 1207 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1208 """ 1209 Validates an Expression, making sure that all its mandatory arguments are set. 1210 1211 Args: 1212 expression: The expression to validate. 1213 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1214 1215 Returns: 1216 The validated expression. 1217 """ 1218 if self.error_level != ErrorLevel.IGNORE: 1219 for error_message in expression.error_messages(args): 1220 self.raise_error(error_message) 1221 1222 return expression 1223 1224 def _find_sql(self, start: Token, end: Token) -> str: 1225 return self.sql[start.start : end.end + 1] 1226 1227 def _is_connected(self) -> bool: 1228 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1229 1230 def _advance(self, times: int = 1) -> None: 1231 self._index += times 1232 self._curr = seq_get(self._tokens, self._index) 1233 self._next = seq_get(self._tokens, self._index + 1) 1234 1235 if self._index > 0: 1236 self._prev = self._tokens[self._index - 1] 1237 self._prev_comments = self._prev.comments 1238 else: 1239 self._prev = None 1240 self._prev_comments = None 1241 1242 def _retreat(self, index: int) -> None: 1243 if index != self._index: 1244 self._advance(index - self._index) 1245 1246 def _warn_unsupported(self) -> None: 1247 if len(self._tokens) <= 1: 1248 return 1249 1250 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1251 # interested in emitting a warning for the one being currently processed. 1252 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1253 1254 logger.warning( 1255 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1256 ) 1257 1258 def _parse_command(self) -> exp.Command: 1259 self._warn_unsupported() 1260 return self.expression( 1261 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1262 ) 1263 1264 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1265 start = self._prev 1266 exists = self._parse_exists() if allow_exists else None 1267 1268 self._match(TokenType.ON) 1269 1270 kind = self._match_set(self.CREATABLES) and self._prev 1271 if not kind: 1272 return self._parse_as_command(start) 1273 1274 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1275 this = self._parse_user_defined_function(kind=kind.token_type) 1276 elif kind.token_type == TokenType.TABLE: 1277 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1278 elif kind.token_type == TokenType.COLUMN: 1279 this = self._parse_column() 1280 else: 1281 this = self._parse_id_var() 1282 1283 self._match(TokenType.IS) 1284 1285 return self.expression( 1286 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1287 ) 1288 1289 def _parse_to_table( 1290 self, 1291 ) -> exp.ToTableProperty: 1292 table = self._parse_table_parts(schema=True) 1293 return self.expression(exp.ToTableProperty, this=table) 1294 1295 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1296 def _parse_ttl(self) -> exp.Expression: 1297 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1298 this = self._parse_bitwise() 1299 1300 if self._match_text_seq("DELETE"): 1301 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1302 if self._match_text_seq("RECOMPRESS"): 1303 return self.expression( 1304 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1305 ) 1306 if self._match_text_seq("TO", "DISK"): 1307 return self.expression( 1308 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1309 ) 1310 if self._match_text_seq("TO", "VOLUME"): 1311 return self.expression( 1312 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1313 ) 1314 1315 return this 1316 1317 expressions = self._parse_csv(_parse_ttl_action) 1318 where = self._parse_where() 1319 group = self._parse_group() 1320 1321 aggregates = None 1322 if group and self._match(TokenType.SET): 1323 aggregates = self._parse_csv(self._parse_set_item) 1324 1325 return self.expression( 1326 exp.MergeTreeTTL, 1327 expressions=expressions, 1328 where=where, 1329 group=group, 1330 aggregates=aggregates, 1331 ) 1332 1333 def _parse_statement(self) -> t.Optional[exp.Expression]: 1334 if self._curr is None: 1335 return None 1336 1337 if self._match_set(self.STATEMENT_PARSERS): 1338 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1339 1340 if self._match_set(Tokenizer.COMMANDS): 1341 return self._parse_command() 1342 1343 expression = self._parse_expression() 1344 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1345 return self._parse_query_modifiers(expression) 1346 1347 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1348 start = self._prev 1349 temporary = self._match(TokenType.TEMPORARY) 1350 materialized = self._match_text_seq("MATERIALIZED") 1351 1352 kind = self._match_set(self.CREATABLES) and self._prev.text 1353 if not kind: 1354 return self._parse_as_command(start) 1355 1356 return self.expression( 1357 exp.Drop, 1358 comments=start.comments, 1359 exists=exists or self._parse_exists(), 1360 this=self._parse_table( 1361 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1362 ), 1363 kind=kind, 1364 temporary=temporary, 1365 materialized=materialized, 1366 cascade=self._match_text_seq("CASCADE"), 1367 constraints=self._match_text_seq("CONSTRAINTS"), 1368 purge=self._match_text_seq("PURGE"), 1369 ) 1370 1371 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1372 return ( 1373 self._match_text_seq("IF") 1374 and (not not_ or self._match(TokenType.NOT)) 1375 and self._match(TokenType.EXISTS) 1376 ) 1377 1378 def _parse_create(self) -> exp.Create | exp.Command: 1379 # Note: this can't be None because we've matched a statement parser 1380 start = self._prev 1381 comments = self._prev_comments 1382 1383 replace = ( 1384 start.token_type == TokenType.REPLACE 1385 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1386 or self._match_pair(TokenType.OR, TokenType.ALTER) 1387 ) 1388 unique = self._match(TokenType.UNIQUE) 1389 1390 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1391 self._advance() 1392 1393 properties = None 1394 create_token = self._match_set(self.CREATABLES) and self._prev 1395 1396 if not create_token: 1397 # exp.Properties.Location.POST_CREATE 1398 properties = self._parse_properties() 1399 create_token = self._match_set(self.CREATABLES) and self._prev 1400 1401 if not properties or not create_token: 1402 return self._parse_as_command(start) 1403 1404 exists = self._parse_exists(not_=True) 1405 this = None 1406 expression: t.Optional[exp.Expression] = None 1407 indexes = None 1408 no_schema_binding = None 1409 begin = None 1410 end = None 1411 clone = None 1412 1413 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1414 nonlocal properties 1415 if properties and temp_props: 1416 properties.expressions.extend(temp_props.expressions) 1417 elif temp_props: 1418 properties = temp_props 1419 1420 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1421 this = self._parse_user_defined_function(kind=create_token.token_type) 1422 1423 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1424 extend_props(self._parse_properties()) 1425 1426 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1427 1428 if not expression: 1429 if self._match(TokenType.COMMAND): 1430 expression = self._parse_as_command(self._prev) 1431 else: 1432 begin = self._match(TokenType.BEGIN) 1433 return_ = self._match_text_seq("RETURN") 1434 1435 if self._match(TokenType.STRING, advance=False): 1436 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1437 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1438 expression = self._parse_string() 1439 extend_props(self._parse_properties()) 1440 else: 1441 expression = self._parse_statement() 1442 1443 end = self._match_text_seq("END") 1444 1445 if return_: 1446 expression = self.expression(exp.Return, this=expression) 1447 elif create_token.token_type == TokenType.INDEX: 1448 this = self._parse_index(index=self._parse_id_var()) 1449 elif create_token.token_type in self.DB_CREATABLES: 1450 table_parts = self._parse_table_parts( 1451 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1452 ) 1453 1454 # exp.Properties.Location.POST_NAME 1455 self._match(TokenType.COMMA) 1456 extend_props(self._parse_properties(before=True)) 1457 1458 this = self._parse_schema(this=table_parts) 1459 1460 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1461 extend_props(self._parse_properties()) 1462 1463 self._match(TokenType.ALIAS) 1464 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1465 # exp.Properties.Location.POST_ALIAS 1466 extend_props(self._parse_properties()) 1467 1468 expression = self._parse_ddl_select() 1469 1470 if create_token.token_type == TokenType.TABLE: 1471 # exp.Properties.Location.POST_EXPRESSION 1472 extend_props(self._parse_properties()) 1473 1474 indexes = [] 1475 while True: 1476 index = self._parse_index() 1477 1478 # exp.Properties.Location.POST_INDEX 1479 extend_props(self._parse_properties()) 1480 1481 if not index: 1482 break 1483 else: 1484 self._match(TokenType.COMMA) 1485 indexes.append(index) 1486 elif create_token.token_type == TokenType.VIEW: 1487 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1488 no_schema_binding = True 1489 1490 shallow = self._match_text_seq("SHALLOW") 1491 1492 if self._match_texts(self.CLONE_KEYWORDS): 1493 copy = self._prev.text.lower() == "copy" 1494 clone = self.expression( 1495 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1496 ) 1497 1498 if self._curr: 1499 return self._parse_as_command(start) 1500 1501 return self.expression( 1502 exp.Create, 1503 comments=comments, 1504 this=this, 1505 kind=create_token.text.upper(), 1506 replace=replace, 1507 unique=unique, 1508 expression=expression, 1509 exists=exists, 1510 properties=properties, 1511 indexes=indexes, 1512 no_schema_binding=no_schema_binding, 1513 begin=begin, 1514 end=end, 1515 clone=clone, 1516 ) 1517 1518 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1519 # only used for teradata currently 1520 self._match(TokenType.COMMA) 1521 1522 kwargs = { 1523 "no": self._match_text_seq("NO"), 1524 "dual": self._match_text_seq("DUAL"), 1525 "before": self._match_text_seq("BEFORE"), 1526 "default": self._match_text_seq("DEFAULT"), 1527 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1528 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1529 "after": self._match_text_seq("AFTER"), 1530 "minimum": self._match_texts(("MIN", "MINIMUM")), 1531 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1532 } 1533 1534 if self._match_texts(self.PROPERTY_PARSERS): 1535 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1536 try: 1537 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1538 except TypeError: 1539 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1540 1541 return None 1542 1543 def _parse_property(self) -> t.Optional[exp.Expression]: 1544 if self._match_texts(self.PROPERTY_PARSERS): 1545 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1546 1547 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1548 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1549 1550 if self._match_text_seq("COMPOUND", "SORTKEY"): 1551 return self._parse_sortkey(compound=True) 1552 1553 if self._match_text_seq("SQL", "SECURITY"): 1554 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1555 1556 index = self._index 1557 key = self._parse_column() 1558 1559 if not self._match(TokenType.EQ): 1560 self._retreat(index) 1561 return None 1562 1563 return self.expression( 1564 exp.Property, 1565 this=key.to_dot() if isinstance(key, exp.Column) else key, 1566 value=self._parse_column() or self._parse_var(any_token=True), 1567 ) 1568 1569 def _parse_stored(self) -> exp.FileFormatProperty: 1570 self._match(TokenType.ALIAS) 1571 1572 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1573 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1574 1575 return self.expression( 1576 exp.FileFormatProperty, 1577 this=( 1578 self.expression( 1579 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1580 ) 1581 if input_format or output_format 1582 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1583 ), 1584 ) 1585 1586 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1587 self._match(TokenType.EQ) 1588 self._match(TokenType.ALIAS) 1589 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1590 1591 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1592 properties = [] 1593 while True: 1594 if before: 1595 prop = self._parse_property_before() 1596 else: 1597 prop = self._parse_property() 1598 1599 if not prop: 1600 break 1601 for p in ensure_list(prop): 1602 properties.append(p) 1603 1604 if properties: 1605 return self.expression(exp.Properties, expressions=properties) 1606 1607 return None 1608 1609 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1610 return self.expression( 1611 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1612 ) 1613 1614 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1615 if self._index >= 2: 1616 pre_volatile_token = self._tokens[self._index - 2] 1617 else: 1618 pre_volatile_token = None 1619 1620 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1621 return exp.VolatileProperty() 1622 1623 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1624 1625 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1626 self._match_pair(TokenType.EQ, TokenType.ON) 1627 1628 prop = self.expression(exp.WithSystemVersioningProperty) 1629 if self._match(TokenType.L_PAREN): 1630 self._match_text_seq("HISTORY_TABLE", "=") 1631 prop.set("this", self._parse_table_parts()) 1632 1633 if self._match(TokenType.COMMA): 1634 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1635 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1636 1637 self._match_r_paren() 1638 1639 return prop 1640 1641 def _parse_with_property( 1642 self, 1643 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1644 if self._match(TokenType.L_PAREN, advance=False): 1645 return self._parse_wrapped_csv(self._parse_property) 1646 1647 if self._match_text_seq("JOURNAL"): 1648 return self._parse_withjournaltable() 1649 1650 if self._match_text_seq("DATA"): 1651 return self._parse_withdata(no=False) 1652 elif self._match_text_seq("NO", "DATA"): 1653 return self._parse_withdata(no=True) 1654 1655 if not self._next: 1656 return None 1657 1658 return self._parse_withisolatedloading() 1659 1660 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1661 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1662 self._match(TokenType.EQ) 1663 1664 user = self._parse_id_var() 1665 self._match(TokenType.PARAMETER) 1666 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1667 1668 if not user or not host: 1669 return None 1670 1671 return exp.DefinerProperty(this=f"{user}@{host}") 1672 1673 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1674 self._match(TokenType.TABLE) 1675 self._match(TokenType.EQ) 1676 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1677 1678 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1679 return self.expression(exp.LogProperty, no=no) 1680 1681 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1682 return self.expression(exp.JournalProperty, **kwargs) 1683 1684 def _parse_checksum(self) -> exp.ChecksumProperty: 1685 self._match(TokenType.EQ) 1686 1687 on = None 1688 if self._match(TokenType.ON): 1689 on = True 1690 elif self._match_text_seq("OFF"): 1691 on = False 1692 1693 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1694 1695 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1696 return self.expression( 1697 exp.Cluster, 1698 expressions=( 1699 self._parse_wrapped_csv(self._parse_ordered) 1700 if wrapped 1701 else self._parse_csv(self._parse_ordered) 1702 ), 1703 ) 1704 1705 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1706 self._match_text_seq("BY") 1707 1708 self._match_l_paren() 1709 expressions = self._parse_csv(self._parse_column) 1710 self._match_r_paren() 1711 1712 if self._match_text_seq("SORTED", "BY"): 1713 self._match_l_paren() 1714 sorted_by = self._parse_csv(self._parse_ordered) 1715 self._match_r_paren() 1716 else: 1717 sorted_by = None 1718 1719 self._match(TokenType.INTO) 1720 buckets = self._parse_number() 1721 self._match_text_seq("BUCKETS") 1722 1723 return self.expression( 1724 exp.ClusteredByProperty, 1725 expressions=expressions, 1726 sorted_by=sorted_by, 1727 buckets=buckets, 1728 ) 1729 1730 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1731 if not self._match_text_seq("GRANTS"): 1732 self._retreat(self._index - 1) 1733 return None 1734 1735 return self.expression(exp.CopyGrantsProperty) 1736 1737 def _parse_freespace(self) -> exp.FreespaceProperty: 1738 self._match(TokenType.EQ) 1739 return self.expression( 1740 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1741 ) 1742 1743 def _parse_mergeblockratio( 1744 self, no: bool = False, default: bool = False 1745 ) -> exp.MergeBlockRatioProperty: 1746 if self._match(TokenType.EQ): 1747 return self.expression( 1748 exp.MergeBlockRatioProperty, 1749 this=self._parse_number(), 1750 percent=self._match(TokenType.PERCENT), 1751 ) 1752 1753 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1754 1755 def _parse_datablocksize( 1756 self, 1757 default: t.Optional[bool] = None, 1758 minimum: t.Optional[bool] = None, 1759 maximum: t.Optional[bool] = None, 1760 ) -> exp.DataBlocksizeProperty: 1761 self._match(TokenType.EQ) 1762 size = self._parse_number() 1763 1764 units = None 1765 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1766 units = self._prev.text 1767 1768 return self.expression( 1769 exp.DataBlocksizeProperty, 1770 size=size, 1771 units=units, 1772 default=default, 1773 minimum=minimum, 1774 maximum=maximum, 1775 ) 1776 1777 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1778 self._match(TokenType.EQ) 1779 always = self._match_text_seq("ALWAYS") 1780 manual = self._match_text_seq("MANUAL") 1781 never = self._match_text_seq("NEVER") 1782 default = self._match_text_seq("DEFAULT") 1783 1784 autotemp = None 1785 if self._match_text_seq("AUTOTEMP"): 1786 autotemp = self._parse_schema() 1787 1788 return self.expression( 1789 exp.BlockCompressionProperty, 1790 always=always, 1791 manual=manual, 1792 never=never, 1793 default=default, 1794 autotemp=autotemp, 1795 ) 1796 1797 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1798 no = self._match_text_seq("NO") 1799 concurrent = self._match_text_seq("CONCURRENT") 1800 self._match_text_seq("ISOLATED", "LOADING") 1801 for_all = self._match_text_seq("FOR", "ALL") 1802 for_insert = self._match_text_seq("FOR", "INSERT") 1803 for_none = self._match_text_seq("FOR", "NONE") 1804 return self.expression( 1805 exp.IsolatedLoadingProperty, 1806 no=no, 1807 concurrent=concurrent, 1808 for_all=for_all, 1809 for_insert=for_insert, 1810 for_none=for_none, 1811 ) 1812 1813 def _parse_locking(self) -> exp.LockingProperty: 1814 if self._match(TokenType.TABLE): 1815 kind = "TABLE" 1816 elif self._match(TokenType.VIEW): 1817 kind = "VIEW" 1818 elif self._match(TokenType.ROW): 1819 kind = "ROW" 1820 elif self._match_text_seq("DATABASE"): 1821 kind = "DATABASE" 1822 else: 1823 kind = None 1824 1825 if kind in ("DATABASE", "TABLE", "VIEW"): 1826 this = self._parse_table_parts() 1827 else: 1828 this = None 1829 1830 if self._match(TokenType.FOR): 1831 for_or_in = "FOR" 1832 elif self._match(TokenType.IN): 1833 for_or_in = "IN" 1834 else: 1835 for_or_in = None 1836 1837 if self._match_text_seq("ACCESS"): 1838 lock_type = "ACCESS" 1839 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1840 lock_type = "EXCLUSIVE" 1841 elif self._match_text_seq("SHARE"): 1842 lock_type = "SHARE" 1843 elif self._match_text_seq("READ"): 1844 lock_type = "READ" 1845 elif self._match_text_seq("WRITE"): 1846 lock_type = "WRITE" 1847 elif self._match_text_seq("CHECKSUM"): 1848 lock_type = "CHECKSUM" 1849 else: 1850 lock_type = None 1851 1852 override = self._match_text_seq("OVERRIDE") 1853 1854 return self.expression( 1855 exp.LockingProperty, 1856 this=this, 1857 kind=kind, 1858 for_or_in=for_or_in, 1859 lock_type=lock_type, 1860 override=override, 1861 ) 1862 1863 def _parse_partition_by(self) -> t.List[exp.Expression]: 1864 if self._match(TokenType.PARTITION_BY): 1865 return self._parse_csv(self._parse_conjunction) 1866 return [] 1867 1868 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1869 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1870 if self._match_text_seq("MINVALUE"): 1871 return exp.var("MINVALUE") 1872 if self._match_text_seq("MAXVALUE"): 1873 return exp.var("MAXVALUE") 1874 return self._parse_bitwise() 1875 1876 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1877 expression = None 1878 from_expressions = None 1879 to_expressions = None 1880 1881 if self._match(TokenType.IN): 1882 this = self._parse_wrapped_csv(self._parse_bitwise) 1883 elif self._match(TokenType.FROM): 1884 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1885 self._match_text_seq("TO") 1886 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1887 elif self._match_text_seq("WITH", "(", "MODULUS"): 1888 this = self._parse_number() 1889 self._match_text_seq(",", "REMAINDER") 1890 expression = self._parse_number() 1891 self._match_r_paren() 1892 else: 1893 self.raise_error("Failed to parse partition bound spec.") 1894 1895 return self.expression( 1896 exp.PartitionBoundSpec, 1897 this=this, 1898 expression=expression, 1899 from_expressions=from_expressions, 1900 to_expressions=to_expressions, 1901 ) 1902 1903 # https://www.postgresql.org/docs/current/sql-createtable.html 1904 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1905 if not self._match_text_seq("OF"): 1906 self._retreat(self._index - 1) 1907 return None 1908 1909 this = self._parse_table(schema=True) 1910 1911 if self._match(TokenType.DEFAULT): 1912 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1913 elif self._match_text_seq("FOR", "VALUES"): 1914 expression = self._parse_partition_bound_spec() 1915 else: 1916 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1917 1918 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1919 1920 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1921 self._match(TokenType.EQ) 1922 return self.expression( 1923 exp.PartitionedByProperty, 1924 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1925 ) 1926 1927 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1928 if self._match_text_seq("AND", "STATISTICS"): 1929 statistics = True 1930 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1931 statistics = False 1932 else: 1933 statistics = None 1934 1935 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1936 1937 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1938 if self._match_text_seq("SQL"): 1939 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 1940 return None 1941 1942 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1943 if self._match_text_seq("SQL", "DATA"): 1944 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 1945 return None 1946 1947 def _parse_no_property(self) -> t.Optional[exp.Expression]: 1948 if self._match_text_seq("PRIMARY", "INDEX"): 1949 return exp.NoPrimaryIndexProperty() 1950 if self._match_text_seq("SQL"): 1951 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 1952 return None 1953 1954 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1955 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1956 return exp.OnCommitProperty() 1957 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1958 return exp.OnCommitProperty(delete=True) 1959 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1960 1961 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1962 if self._match_text_seq("SQL", "DATA"): 1963 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 1964 return None 1965 1966 def _parse_distkey(self) -> exp.DistKeyProperty: 1967 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1968 1969 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1970 table = self._parse_table(schema=True) 1971 1972 options = [] 1973 while self._match_texts(("INCLUDING", "EXCLUDING")): 1974 this = self._prev.text.upper() 1975 1976 id_var = self._parse_id_var() 1977 if not id_var: 1978 return None 1979 1980 options.append( 1981 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1982 ) 1983 1984 return self.expression(exp.LikeProperty, this=table, expressions=options) 1985 1986 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1987 return self.expression( 1988 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1989 ) 1990 1991 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1992 self._match(TokenType.EQ) 1993 return self.expression( 1994 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1995 ) 1996 1997 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1998 self._match_text_seq("WITH", "CONNECTION") 1999 return self.expression( 2000 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2001 ) 2002 2003 def _parse_returns(self) -> exp.ReturnsProperty: 2004 value: t.Optional[exp.Expression] 2005 is_table = self._match(TokenType.TABLE) 2006 2007 if is_table: 2008 if self._match(TokenType.LT): 2009 value = self.expression( 2010 exp.Schema, 2011 this="TABLE", 2012 expressions=self._parse_csv(self._parse_struct_types), 2013 ) 2014 if not self._match(TokenType.GT): 2015 self.raise_error("Expecting >") 2016 else: 2017 value = self._parse_schema(exp.var("TABLE")) 2018 else: 2019 value = self._parse_types() 2020 2021 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2022 2023 def _parse_describe(self) -> exp.Describe: 2024 kind = self._match_set(self.CREATABLES) and self._prev.text 2025 extended = self._match_text_seq("EXTENDED") 2026 this = self._parse_table(schema=True) 2027 properties = self._parse_properties() 2028 expressions = properties.expressions if properties else None 2029 return self.expression( 2030 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2031 ) 2032 2033 def _parse_insert(self) -> exp.Insert: 2034 comments = ensure_list(self._prev_comments) 2035 overwrite = self._match(TokenType.OVERWRITE) 2036 ignore = self._match(TokenType.IGNORE) 2037 local = self._match_text_seq("LOCAL") 2038 alternative = None 2039 2040 if self._match_text_seq("DIRECTORY"): 2041 this: t.Optional[exp.Expression] = self.expression( 2042 exp.Directory, 2043 this=self._parse_var_or_string(), 2044 local=local, 2045 row_format=self._parse_row_format(match_row=True), 2046 ) 2047 else: 2048 if self._match(TokenType.OR): 2049 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2050 2051 self._match(TokenType.INTO) 2052 comments += ensure_list(self._prev_comments) 2053 self._match(TokenType.TABLE) 2054 this = self._parse_table(schema=True) 2055 2056 returning = self._parse_returning() 2057 2058 return self.expression( 2059 exp.Insert, 2060 comments=comments, 2061 this=this, 2062 by_name=self._match_text_seq("BY", "NAME"), 2063 exists=self._parse_exists(), 2064 partition=self._parse_partition(), 2065 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2066 and self._parse_conjunction(), 2067 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2068 conflict=self._parse_on_conflict(), 2069 returning=returning or self._parse_returning(), 2070 overwrite=overwrite, 2071 alternative=alternative, 2072 ignore=ignore, 2073 ) 2074 2075 def _parse_kill(self) -> exp.Kill: 2076 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2077 2078 return self.expression( 2079 exp.Kill, 2080 this=self._parse_primary(), 2081 kind=kind, 2082 ) 2083 2084 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2085 conflict = self._match_text_seq("ON", "CONFLICT") 2086 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2087 2088 if not conflict and not duplicate: 2089 return None 2090 2091 nothing = None 2092 expressions = None 2093 key = None 2094 constraint = None 2095 2096 if conflict: 2097 if self._match_text_seq("ON", "CONSTRAINT"): 2098 constraint = self._parse_id_var() 2099 else: 2100 key = self._parse_csv(self._parse_value) 2101 2102 self._match_text_seq("DO") 2103 if self._match_text_seq("NOTHING"): 2104 nothing = True 2105 else: 2106 self._match(TokenType.UPDATE) 2107 self._match(TokenType.SET) 2108 expressions = self._parse_csv(self._parse_equality) 2109 2110 return self.expression( 2111 exp.OnConflict, 2112 duplicate=duplicate, 2113 expressions=expressions, 2114 nothing=nothing, 2115 key=key, 2116 constraint=constraint, 2117 ) 2118 2119 def _parse_returning(self) -> t.Optional[exp.Returning]: 2120 if not self._match(TokenType.RETURNING): 2121 return None 2122 return self.expression( 2123 exp.Returning, 2124 expressions=self._parse_csv(self._parse_expression), 2125 into=self._match(TokenType.INTO) and self._parse_table_part(), 2126 ) 2127 2128 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2129 if not self._match(TokenType.FORMAT): 2130 return None 2131 return self._parse_row_format() 2132 2133 def _parse_row_format( 2134 self, match_row: bool = False 2135 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2136 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2137 return None 2138 2139 if self._match_text_seq("SERDE"): 2140 this = self._parse_string() 2141 2142 serde_properties = None 2143 if self._match(TokenType.SERDE_PROPERTIES): 2144 serde_properties = self.expression( 2145 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2146 ) 2147 2148 return self.expression( 2149 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2150 ) 2151 2152 self._match_text_seq("DELIMITED") 2153 2154 kwargs = {} 2155 2156 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2157 kwargs["fields"] = self._parse_string() 2158 if self._match_text_seq("ESCAPED", "BY"): 2159 kwargs["escaped"] = self._parse_string() 2160 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2161 kwargs["collection_items"] = self._parse_string() 2162 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2163 kwargs["map_keys"] = self._parse_string() 2164 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2165 kwargs["lines"] = self._parse_string() 2166 if self._match_text_seq("NULL", "DEFINED", "AS"): 2167 kwargs["null"] = self._parse_string() 2168 2169 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2170 2171 def _parse_load(self) -> exp.LoadData | exp.Command: 2172 if self._match_text_seq("DATA"): 2173 local = self._match_text_seq("LOCAL") 2174 self._match_text_seq("INPATH") 2175 inpath = self._parse_string() 2176 overwrite = self._match(TokenType.OVERWRITE) 2177 self._match_pair(TokenType.INTO, TokenType.TABLE) 2178 2179 return self.expression( 2180 exp.LoadData, 2181 this=self._parse_table(schema=True), 2182 local=local, 2183 overwrite=overwrite, 2184 inpath=inpath, 2185 partition=self._parse_partition(), 2186 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2187 serde=self._match_text_seq("SERDE") and self._parse_string(), 2188 ) 2189 return self._parse_as_command(self._prev) 2190 2191 def _parse_delete(self) -> exp.Delete: 2192 # This handles MySQL's "Multiple-Table Syntax" 2193 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2194 tables = None 2195 comments = self._prev_comments 2196 if not self._match(TokenType.FROM, advance=False): 2197 tables = self._parse_csv(self._parse_table) or None 2198 2199 returning = self._parse_returning() 2200 2201 return self.expression( 2202 exp.Delete, 2203 comments=comments, 2204 tables=tables, 2205 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2206 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2207 where=self._parse_where(), 2208 returning=returning or self._parse_returning(), 2209 limit=self._parse_limit(), 2210 ) 2211 2212 def _parse_update(self) -> exp.Update: 2213 comments = self._prev_comments 2214 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2215 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2216 returning = self._parse_returning() 2217 return self.expression( 2218 exp.Update, 2219 comments=comments, 2220 **{ # type: ignore 2221 "this": this, 2222 "expressions": expressions, 2223 "from": self._parse_from(joins=True), 2224 "where": self._parse_where(), 2225 "returning": returning or self._parse_returning(), 2226 "order": self._parse_order(), 2227 "limit": self._parse_limit(), 2228 }, 2229 ) 2230 2231 def _parse_uncache(self) -> exp.Uncache: 2232 if not self._match(TokenType.TABLE): 2233 self.raise_error("Expecting TABLE after UNCACHE") 2234 2235 return self.expression( 2236 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2237 ) 2238 2239 def _parse_cache(self) -> exp.Cache: 2240 lazy = self._match_text_seq("LAZY") 2241 self._match(TokenType.TABLE) 2242 table = self._parse_table(schema=True) 2243 2244 options = [] 2245 if self._match_text_seq("OPTIONS"): 2246 self._match_l_paren() 2247 k = self._parse_string() 2248 self._match(TokenType.EQ) 2249 v = self._parse_string() 2250 options = [k, v] 2251 self._match_r_paren() 2252 2253 self._match(TokenType.ALIAS) 2254 return self.expression( 2255 exp.Cache, 2256 this=table, 2257 lazy=lazy, 2258 options=options, 2259 expression=self._parse_select(nested=True), 2260 ) 2261 2262 def _parse_partition(self) -> t.Optional[exp.Partition]: 2263 if not self._match(TokenType.PARTITION): 2264 return None 2265 2266 return self.expression( 2267 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2268 ) 2269 2270 def _parse_value(self) -> exp.Tuple: 2271 if self._match(TokenType.L_PAREN): 2272 expressions = self._parse_csv(self._parse_expression) 2273 self._match_r_paren() 2274 return self.expression(exp.Tuple, expressions=expressions) 2275 2276 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2277 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2278 2279 def _parse_projections(self) -> t.List[exp.Expression]: 2280 return self._parse_expressions() 2281 2282 def _parse_select( 2283 self, 2284 nested: bool = False, 2285 table: bool = False, 2286 parse_subquery_alias: bool = True, 2287 parse_set_operation: bool = True, 2288 ) -> t.Optional[exp.Expression]: 2289 cte = self._parse_with() 2290 2291 if cte: 2292 this = self._parse_statement() 2293 2294 if not this: 2295 self.raise_error("Failed to parse any statement following CTE") 2296 return cte 2297 2298 if "with" in this.arg_types: 2299 this.set("with", cte) 2300 else: 2301 self.raise_error(f"{this.key} does not support CTE") 2302 this = cte 2303 2304 return this 2305 2306 # duckdb supports leading with FROM x 2307 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2308 2309 if self._match(TokenType.SELECT): 2310 comments = self._prev_comments 2311 2312 hint = self._parse_hint() 2313 all_ = self._match(TokenType.ALL) 2314 distinct = self._match_set(self.DISTINCT_TOKENS) 2315 2316 kind = ( 2317 self._match(TokenType.ALIAS) 2318 and self._match_texts(("STRUCT", "VALUE")) 2319 and self._prev.text.upper() 2320 ) 2321 2322 if distinct: 2323 distinct = self.expression( 2324 exp.Distinct, 2325 on=self._parse_value() if self._match(TokenType.ON) else None, 2326 ) 2327 2328 if all_ and distinct: 2329 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2330 2331 limit = self._parse_limit(top=True) 2332 projections = self._parse_projections() 2333 2334 this = self.expression( 2335 exp.Select, 2336 kind=kind, 2337 hint=hint, 2338 distinct=distinct, 2339 expressions=projections, 2340 limit=limit, 2341 ) 2342 this.comments = comments 2343 2344 into = self._parse_into() 2345 if into: 2346 this.set("into", into) 2347 2348 if not from_: 2349 from_ = self._parse_from() 2350 2351 if from_: 2352 this.set("from", from_) 2353 2354 this = self._parse_query_modifiers(this) 2355 elif (table or nested) and self._match(TokenType.L_PAREN): 2356 if self._match(TokenType.PIVOT): 2357 this = self._parse_simplified_pivot() 2358 elif self._match(TokenType.FROM): 2359 this = exp.select("*").from_( 2360 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2361 ) 2362 else: 2363 this = ( 2364 self._parse_table() 2365 if table 2366 else self._parse_select(nested=True, parse_set_operation=False) 2367 ) 2368 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2369 2370 self._match_r_paren() 2371 2372 # We return early here so that the UNION isn't attached to the subquery by the 2373 # following call to _parse_set_operations, but instead becomes the parent node 2374 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2375 elif self._match(TokenType.VALUES, advance=False): 2376 this = self._parse_derived_table_values() 2377 elif from_: 2378 this = exp.select("*").from_(from_.this, copy=False) 2379 else: 2380 this = None 2381 2382 if parse_set_operation: 2383 return self._parse_set_operations(this) 2384 return this 2385 2386 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2387 if not skip_with_token and not self._match(TokenType.WITH): 2388 return None 2389 2390 comments = self._prev_comments 2391 recursive = self._match(TokenType.RECURSIVE) 2392 2393 expressions = [] 2394 while True: 2395 expressions.append(self._parse_cte()) 2396 2397 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2398 break 2399 else: 2400 self._match(TokenType.WITH) 2401 2402 return self.expression( 2403 exp.With, comments=comments, expressions=expressions, recursive=recursive 2404 ) 2405 2406 def _parse_cte(self) -> exp.CTE: 2407 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2408 if not alias or not alias.this: 2409 self.raise_error("Expected CTE to have alias") 2410 2411 self._match(TokenType.ALIAS) 2412 return self.expression( 2413 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2414 ) 2415 2416 def _parse_table_alias( 2417 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2418 ) -> t.Optional[exp.TableAlias]: 2419 any_token = self._match(TokenType.ALIAS) 2420 alias = ( 2421 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2422 or self._parse_string_as_identifier() 2423 ) 2424 2425 index = self._index 2426 if self._match(TokenType.L_PAREN): 2427 columns = self._parse_csv(self._parse_function_parameter) 2428 self._match_r_paren() if columns else self._retreat(index) 2429 else: 2430 columns = None 2431 2432 if not alias and not columns: 2433 return None 2434 2435 return self.expression(exp.TableAlias, this=alias, columns=columns) 2436 2437 def _parse_subquery( 2438 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2439 ) -> t.Optional[exp.Subquery]: 2440 if not this: 2441 return None 2442 2443 return self.expression( 2444 exp.Subquery, 2445 this=this, 2446 pivots=self._parse_pivots(), 2447 alias=self._parse_table_alias() if parse_alias else None, 2448 ) 2449 2450 def _parse_query_modifiers( 2451 self, this: t.Optional[exp.Expression] 2452 ) -> t.Optional[exp.Expression]: 2453 if isinstance(this, self.MODIFIABLES): 2454 for join in iter(self._parse_join, None): 2455 this.append("joins", join) 2456 for lateral in iter(self._parse_lateral, None): 2457 this.append("laterals", lateral) 2458 2459 while True: 2460 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2461 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2462 key, expression = parser(self) 2463 2464 if expression: 2465 this.set(key, expression) 2466 if key == "limit": 2467 offset = expression.args.pop("offset", None) 2468 2469 if offset: 2470 offset = exp.Offset(expression=offset) 2471 this.set("offset", offset) 2472 2473 limit_by_expressions = expression.expressions 2474 expression.set("expressions", None) 2475 offset.set("expressions", limit_by_expressions) 2476 continue 2477 break 2478 return this 2479 2480 def _parse_hint(self) -> t.Optional[exp.Hint]: 2481 if self._match(TokenType.HINT): 2482 hints = [] 2483 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2484 hints.extend(hint) 2485 2486 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2487 self.raise_error("Expected */ after HINT") 2488 2489 return self.expression(exp.Hint, expressions=hints) 2490 2491 return None 2492 2493 def _parse_into(self) -> t.Optional[exp.Into]: 2494 if not self._match(TokenType.INTO): 2495 return None 2496 2497 temp = self._match(TokenType.TEMPORARY) 2498 unlogged = self._match_text_seq("UNLOGGED") 2499 self._match(TokenType.TABLE) 2500 2501 return self.expression( 2502 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2503 ) 2504 2505 def _parse_from( 2506 self, joins: bool = False, skip_from_token: bool = False 2507 ) -> t.Optional[exp.From]: 2508 if not skip_from_token and not self._match(TokenType.FROM): 2509 return None 2510 2511 return self.expression( 2512 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2513 ) 2514 2515 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2516 if not self._match(TokenType.MATCH_RECOGNIZE): 2517 return None 2518 2519 self._match_l_paren() 2520 2521 partition = self._parse_partition_by() 2522 order = self._parse_order() 2523 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2524 2525 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2526 rows = exp.var("ONE ROW PER MATCH") 2527 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2528 text = "ALL ROWS PER MATCH" 2529 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2530 text += " SHOW EMPTY MATCHES" 2531 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2532 text += " OMIT EMPTY MATCHES" 2533 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2534 text += " WITH UNMATCHED ROWS" 2535 rows = exp.var(text) 2536 else: 2537 rows = None 2538 2539 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2540 text = "AFTER MATCH SKIP" 2541 if self._match_text_seq("PAST", "LAST", "ROW"): 2542 text += " PAST LAST ROW" 2543 elif self._match_text_seq("TO", "NEXT", "ROW"): 2544 text += " TO NEXT ROW" 2545 elif self._match_text_seq("TO", "FIRST"): 2546 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2547 elif self._match_text_seq("TO", "LAST"): 2548 text += f" TO LAST {self._advance_any().text}" # type: ignore 2549 after = exp.var(text) 2550 else: 2551 after = None 2552 2553 if self._match_text_seq("PATTERN"): 2554 self._match_l_paren() 2555 2556 if not self._curr: 2557 self.raise_error("Expecting )", self._curr) 2558 2559 paren = 1 2560 start = self._curr 2561 2562 while self._curr and paren > 0: 2563 if self._curr.token_type == TokenType.L_PAREN: 2564 paren += 1 2565 if self._curr.token_type == TokenType.R_PAREN: 2566 paren -= 1 2567 2568 end = self._prev 2569 self._advance() 2570 2571 if paren > 0: 2572 self.raise_error("Expecting )", self._curr) 2573 2574 pattern = exp.var(self._find_sql(start, end)) 2575 else: 2576 pattern = None 2577 2578 define = ( 2579 self._parse_csv(self._parse_name_as_expression) 2580 if self._match_text_seq("DEFINE") 2581 else None 2582 ) 2583 2584 self._match_r_paren() 2585 2586 return self.expression( 2587 exp.MatchRecognize, 2588 partition_by=partition, 2589 order=order, 2590 measures=measures, 2591 rows=rows, 2592 after=after, 2593 pattern=pattern, 2594 define=define, 2595 alias=self._parse_table_alias(), 2596 ) 2597 2598 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2599 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2600 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2601 cross_apply = False 2602 2603 if cross_apply is not None: 2604 this = self._parse_select(table=True) 2605 view = None 2606 outer = None 2607 elif self._match(TokenType.LATERAL): 2608 this = self._parse_select(table=True) 2609 view = self._match(TokenType.VIEW) 2610 outer = self._match(TokenType.OUTER) 2611 else: 2612 return None 2613 2614 if not this: 2615 this = ( 2616 self._parse_unnest() 2617 or self._parse_function() 2618 or self._parse_id_var(any_token=False) 2619 ) 2620 2621 while self._match(TokenType.DOT): 2622 this = exp.Dot( 2623 this=this, 2624 expression=self._parse_function() or self._parse_id_var(any_token=False), 2625 ) 2626 2627 if view: 2628 table = self._parse_id_var(any_token=False) 2629 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2630 table_alias: t.Optional[exp.TableAlias] = self.expression( 2631 exp.TableAlias, this=table, columns=columns 2632 ) 2633 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2634 # We move the alias from the lateral's child node to the lateral itself 2635 table_alias = this.args["alias"].pop() 2636 else: 2637 table_alias = self._parse_table_alias() 2638 2639 return self.expression( 2640 exp.Lateral, 2641 this=this, 2642 view=view, 2643 outer=outer, 2644 alias=table_alias, 2645 cross_apply=cross_apply, 2646 ) 2647 2648 def _parse_join_parts( 2649 self, 2650 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2651 return ( 2652 self._match_set(self.JOIN_METHODS) and self._prev, 2653 self._match_set(self.JOIN_SIDES) and self._prev, 2654 self._match_set(self.JOIN_KINDS) and self._prev, 2655 ) 2656 2657 def _parse_join( 2658 self, skip_join_token: bool = False, parse_bracket: bool = False 2659 ) -> t.Optional[exp.Join]: 2660 if self._match(TokenType.COMMA): 2661 return self.expression(exp.Join, this=self._parse_table()) 2662 2663 index = self._index 2664 method, side, kind = self._parse_join_parts() 2665 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2666 join = self._match(TokenType.JOIN) 2667 2668 if not skip_join_token and not join: 2669 self._retreat(index) 2670 kind = None 2671 method = None 2672 side = None 2673 2674 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2675 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2676 2677 if not skip_join_token and not join and not outer_apply and not cross_apply: 2678 return None 2679 2680 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2681 2682 if method: 2683 kwargs["method"] = method.text 2684 if side: 2685 kwargs["side"] = side.text 2686 if kind: 2687 kwargs["kind"] = kind.text 2688 if hint: 2689 kwargs["hint"] = hint 2690 2691 if self._match(TokenType.ON): 2692 kwargs["on"] = self._parse_conjunction() 2693 elif self._match(TokenType.USING): 2694 kwargs["using"] = self._parse_wrapped_id_vars() 2695 elif not (kind and kind.token_type == TokenType.CROSS): 2696 index = self._index 2697 join = self._parse_join() 2698 2699 if join and self._match(TokenType.ON): 2700 kwargs["on"] = self._parse_conjunction() 2701 elif join and self._match(TokenType.USING): 2702 kwargs["using"] = self._parse_wrapped_id_vars() 2703 else: 2704 join = None 2705 self._retreat(index) 2706 2707 kwargs["this"].set("joins", [join] if join else None) 2708 2709 comments = [c for token in (method, side, kind) if token for c in token.comments] 2710 return self.expression(exp.Join, comments=comments, **kwargs) 2711 2712 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2713 this = self._parse_conjunction() 2714 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2715 return this 2716 2717 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2718 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2719 2720 return this 2721 2722 def _parse_index( 2723 self, 2724 index: t.Optional[exp.Expression] = None, 2725 ) -> t.Optional[exp.Index]: 2726 if index: 2727 unique = None 2728 primary = None 2729 amp = None 2730 2731 self._match(TokenType.ON) 2732 self._match(TokenType.TABLE) # hive 2733 table = self._parse_table_parts(schema=True) 2734 else: 2735 unique = self._match(TokenType.UNIQUE) 2736 primary = self._match_text_seq("PRIMARY") 2737 amp = self._match_text_seq("AMP") 2738 2739 if not self._match(TokenType.INDEX): 2740 return None 2741 2742 index = self._parse_id_var() 2743 table = None 2744 2745 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2746 2747 if self._match(TokenType.L_PAREN, advance=False): 2748 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2749 else: 2750 columns = None 2751 2752 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2753 2754 return self.expression( 2755 exp.Index, 2756 this=index, 2757 table=table, 2758 using=using, 2759 columns=columns, 2760 unique=unique, 2761 primary=primary, 2762 amp=amp, 2763 include=include, 2764 partition_by=self._parse_partition_by(), 2765 where=self._parse_where(), 2766 ) 2767 2768 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2769 hints: t.List[exp.Expression] = [] 2770 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2771 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2772 hints.append( 2773 self.expression( 2774 exp.WithTableHint, 2775 expressions=self._parse_csv( 2776 lambda: self._parse_function() or self._parse_var(any_token=True) 2777 ), 2778 ) 2779 ) 2780 self._match_r_paren() 2781 else: 2782 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2783 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2784 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2785 2786 self._match_texts(("INDEX", "KEY")) 2787 if self._match(TokenType.FOR): 2788 hint.set("target", self._advance_any() and self._prev.text.upper()) 2789 2790 hint.set("expressions", self._parse_wrapped_id_vars()) 2791 hints.append(hint) 2792 2793 return hints or None 2794 2795 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2796 return ( 2797 (not schema and self._parse_function(optional_parens=False)) 2798 or self._parse_id_var(any_token=False) 2799 or self._parse_string_as_identifier() 2800 or self._parse_placeholder() 2801 ) 2802 2803 def _parse_table_parts(self, schema: bool = False, is_db_reference: bool = False) -> exp.Table: 2804 catalog = None 2805 db = None 2806 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2807 2808 while self._match(TokenType.DOT): 2809 if catalog: 2810 # This allows nesting the table in arbitrarily many dot expressions if needed 2811 table = self.expression( 2812 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2813 ) 2814 else: 2815 catalog = db 2816 db = table 2817 table = self._parse_table_part(schema=schema) or "" 2818 2819 if is_db_reference: 2820 catalog = db 2821 db = table 2822 table = None 2823 2824 if not table and not is_db_reference: 2825 self.raise_error(f"Expected table name but got {self._curr}") 2826 if not db and is_db_reference: 2827 self.raise_error(f"Expected database name but got {self._curr}") 2828 2829 return self.expression( 2830 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2831 ) 2832 2833 def _parse_table( 2834 self, 2835 schema: bool = False, 2836 joins: bool = False, 2837 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2838 parse_bracket: bool = False, 2839 is_db_reference: bool = False, 2840 ) -> t.Optional[exp.Expression]: 2841 lateral = self._parse_lateral() 2842 if lateral: 2843 return lateral 2844 2845 unnest = self._parse_unnest() 2846 if unnest: 2847 return unnest 2848 2849 values = self._parse_derived_table_values() 2850 if values: 2851 return values 2852 2853 subquery = self._parse_select(table=True) 2854 if subquery: 2855 if not subquery.args.get("pivots"): 2856 subquery.set("pivots", self._parse_pivots()) 2857 return subquery 2858 2859 bracket = parse_bracket and self._parse_bracket(None) 2860 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2861 this = t.cast( 2862 exp.Expression, 2863 bracket 2864 or self._parse_bracket( 2865 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 2866 ), 2867 ) 2868 2869 if schema: 2870 return self._parse_schema(this=this) 2871 2872 version = self._parse_version() 2873 2874 if version: 2875 this.set("version", version) 2876 2877 if self.dialect.ALIAS_POST_TABLESAMPLE: 2878 table_sample = self._parse_table_sample() 2879 2880 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2881 if alias: 2882 this.set("alias", alias) 2883 2884 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2885 return self.expression( 2886 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2887 ) 2888 2889 this.set("hints", self._parse_table_hints()) 2890 2891 if not this.args.get("pivots"): 2892 this.set("pivots", self._parse_pivots()) 2893 2894 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2895 table_sample = self._parse_table_sample() 2896 2897 if table_sample: 2898 table_sample.set("this", this) 2899 this = table_sample 2900 2901 if joins: 2902 for join in iter(self._parse_join, None): 2903 this.append("joins", join) 2904 2905 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2906 this.set("ordinality", True) 2907 this.set("alias", self._parse_table_alias()) 2908 2909 return this 2910 2911 def _parse_version(self) -> t.Optional[exp.Version]: 2912 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2913 this = "TIMESTAMP" 2914 elif self._match(TokenType.VERSION_SNAPSHOT): 2915 this = "VERSION" 2916 else: 2917 return None 2918 2919 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2920 kind = self._prev.text.upper() 2921 start = self._parse_bitwise() 2922 self._match_texts(("TO", "AND")) 2923 end = self._parse_bitwise() 2924 expression: t.Optional[exp.Expression] = self.expression( 2925 exp.Tuple, expressions=[start, end] 2926 ) 2927 elif self._match_text_seq("CONTAINED", "IN"): 2928 kind = "CONTAINED IN" 2929 expression = self.expression( 2930 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2931 ) 2932 elif self._match(TokenType.ALL): 2933 kind = "ALL" 2934 expression = None 2935 else: 2936 self._match_text_seq("AS", "OF") 2937 kind = "AS OF" 2938 expression = self._parse_type() 2939 2940 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2941 2942 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2943 if not self._match(TokenType.UNNEST): 2944 return None 2945 2946 expressions = self._parse_wrapped_csv(self._parse_equality) 2947 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2948 2949 alias = self._parse_table_alias() if with_alias else None 2950 2951 if alias: 2952 if self.dialect.UNNEST_COLUMN_ONLY: 2953 if alias.args.get("columns"): 2954 self.raise_error("Unexpected extra column alias in unnest.") 2955 2956 alias.set("columns", [alias.this]) 2957 alias.set("this", None) 2958 2959 columns = alias.args.get("columns") or [] 2960 if offset and len(expressions) < len(columns): 2961 offset = columns.pop() 2962 2963 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2964 self._match(TokenType.ALIAS) 2965 offset = self._parse_id_var( 2966 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2967 ) or exp.to_identifier("offset") 2968 2969 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2970 2971 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2972 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2973 if not is_derived and not self._match_text_seq("VALUES"): 2974 return None 2975 2976 expressions = self._parse_csv(self._parse_value) 2977 alias = self._parse_table_alias() 2978 2979 if is_derived: 2980 self._match_r_paren() 2981 2982 return self.expression( 2983 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2984 ) 2985 2986 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2987 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2988 as_modifier and self._match_text_seq("USING", "SAMPLE") 2989 ): 2990 return None 2991 2992 bucket_numerator = None 2993 bucket_denominator = None 2994 bucket_field = None 2995 percent = None 2996 size = None 2997 seed = None 2998 2999 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3000 matched_l_paren = self._match(TokenType.L_PAREN) 3001 3002 if self.TABLESAMPLE_CSV: 3003 num = None 3004 expressions = self._parse_csv(self._parse_primary) 3005 else: 3006 expressions = None 3007 num = ( 3008 self._parse_factor() 3009 if self._match(TokenType.NUMBER, advance=False) 3010 else self._parse_primary() or self._parse_placeholder() 3011 ) 3012 3013 if self._match_text_seq("BUCKET"): 3014 bucket_numerator = self._parse_number() 3015 self._match_text_seq("OUT", "OF") 3016 bucket_denominator = bucket_denominator = self._parse_number() 3017 self._match(TokenType.ON) 3018 bucket_field = self._parse_field() 3019 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3020 percent = num 3021 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3022 size = num 3023 else: 3024 percent = num 3025 3026 if matched_l_paren: 3027 self._match_r_paren() 3028 3029 if self._match(TokenType.L_PAREN): 3030 method = self._parse_var(upper=True) 3031 seed = self._match(TokenType.COMMA) and self._parse_number() 3032 self._match_r_paren() 3033 elif self._match_texts(("SEED", "REPEATABLE")): 3034 seed = self._parse_wrapped(self._parse_number) 3035 3036 return self.expression( 3037 exp.TableSample, 3038 expressions=expressions, 3039 method=method, 3040 bucket_numerator=bucket_numerator, 3041 bucket_denominator=bucket_denominator, 3042 bucket_field=bucket_field, 3043 percent=percent, 3044 size=size, 3045 seed=seed, 3046 ) 3047 3048 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3049 return list(iter(self._parse_pivot, None)) or None 3050 3051 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 3052 return list(iter(self._parse_join, None)) or None 3053 3054 # https://duckdb.org/docs/sql/statements/pivot 3055 def _parse_simplified_pivot(self) -> exp.Pivot: 3056 def _parse_on() -> t.Optional[exp.Expression]: 3057 this = self._parse_bitwise() 3058 return self._parse_in(this) if self._match(TokenType.IN) else this 3059 3060 this = self._parse_table() 3061 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3062 using = self._match(TokenType.USING) and self._parse_csv( 3063 lambda: self._parse_alias(self._parse_function()) 3064 ) 3065 group = self._parse_group() 3066 return self.expression( 3067 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3068 ) 3069 3070 def _parse_pivot_in(self) -> exp.In: 3071 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3072 this = self._parse_conjunction() 3073 3074 self._match(TokenType.ALIAS) 3075 alias = self._parse_field() 3076 if alias: 3077 return self.expression(exp.PivotAlias, this=this, alias=alias) 3078 3079 return this 3080 3081 value = self._parse_column() 3082 3083 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3084 self.raise_error("Expecting IN (") 3085 3086 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3087 3088 self._match_r_paren() 3089 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3090 3091 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3092 index = self._index 3093 include_nulls = None 3094 3095 if self._match(TokenType.PIVOT): 3096 unpivot = False 3097 elif self._match(TokenType.UNPIVOT): 3098 unpivot = True 3099 3100 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3101 if self._match_text_seq("INCLUDE", "NULLS"): 3102 include_nulls = True 3103 elif self._match_text_seq("EXCLUDE", "NULLS"): 3104 include_nulls = False 3105 else: 3106 return None 3107 3108 expressions = [] 3109 3110 if not self._match(TokenType.L_PAREN): 3111 self._retreat(index) 3112 return None 3113 3114 if unpivot: 3115 expressions = self._parse_csv(self._parse_column) 3116 else: 3117 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3118 3119 if not expressions: 3120 self.raise_error("Failed to parse PIVOT's aggregation list") 3121 3122 if not self._match(TokenType.FOR): 3123 self.raise_error("Expecting FOR") 3124 3125 field = self._parse_pivot_in() 3126 3127 self._match_r_paren() 3128 3129 pivot = self.expression( 3130 exp.Pivot, 3131 expressions=expressions, 3132 field=field, 3133 unpivot=unpivot, 3134 include_nulls=include_nulls, 3135 ) 3136 3137 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3138 pivot.set("alias", self._parse_table_alias()) 3139 3140 if not unpivot: 3141 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3142 3143 columns: t.List[exp.Expression] = [] 3144 for fld in pivot.args["field"].expressions: 3145 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3146 for name in names: 3147 if self.PREFIXED_PIVOT_COLUMNS: 3148 name = f"{name}_{field_name}" if name else field_name 3149 else: 3150 name = f"{field_name}_{name}" if name else field_name 3151 3152 columns.append(exp.to_identifier(name)) 3153 3154 pivot.set("columns", columns) 3155 3156 return pivot 3157 3158 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3159 return [agg.alias for agg in aggregations] 3160 3161 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3162 if not skip_where_token and not self._match(TokenType.WHERE): 3163 return None 3164 3165 return self.expression( 3166 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3167 ) 3168 3169 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3170 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3171 return None 3172 3173 elements = defaultdict(list) 3174 3175 if self._match(TokenType.ALL): 3176 return self.expression(exp.Group, all=True) 3177 3178 while True: 3179 expressions = self._parse_csv(self._parse_conjunction) 3180 if expressions: 3181 elements["expressions"].extend(expressions) 3182 3183 grouping_sets = self._parse_grouping_sets() 3184 if grouping_sets: 3185 elements["grouping_sets"].extend(grouping_sets) 3186 3187 rollup = None 3188 cube = None 3189 totals = None 3190 3191 index = self._index 3192 with_ = self._match(TokenType.WITH) 3193 if self._match(TokenType.ROLLUP): 3194 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3195 elements["rollup"].extend(ensure_list(rollup)) 3196 3197 if self._match(TokenType.CUBE): 3198 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3199 elements["cube"].extend(ensure_list(cube)) 3200 3201 if self._match_text_seq("TOTALS"): 3202 totals = True 3203 elements["totals"] = True # type: ignore 3204 3205 if not (grouping_sets or rollup or cube or totals): 3206 if with_: 3207 self._retreat(index) 3208 break 3209 3210 return self.expression(exp.Group, **elements) # type: ignore 3211 3212 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3213 if not self._match(TokenType.GROUPING_SETS): 3214 return None 3215 3216 return self._parse_wrapped_csv(self._parse_grouping_set) 3217 3218 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3219 if self._match(TokenType.L_PAREN): 3220 grouping_set = self._parse_csv(self._parse_column) 3221 self._match_r_paren() 3222 return self.expression(exp.Tuple, expressions=grouping_set) 3223 3224 return self._parse_column() 3225 3226 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3227 if not skip_having_token and not self._match(TokenType.HAVING): 3228 return None 3229 return self.expression(exp.Having, this=self._parse_conjunction()) 3230 3231 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3232 if not self._match(TokenType.QUALIFY): 3233 return None 3234 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3235 3236 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3237 if skip_start_token: 3238 start = None 3239 elif self._match(TokenType.START_WITH): 3240 start = self._parse_conjunction() 3241 else: 3242 return None 3243 3244 self._match(TokenType.CONNECT_BY) 3245 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3246 exp.Prior, this=self._parse_bitwise() 3247 ) 3248 connect = self._parse_conjunction() 3249 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3250 3251 if not start and self._match(TokenType.START_WITH): 3252 start = self._parse_conjunction() 3253 3254 return self.expression(exp.Connect, start=start, connect=connect) 3255 3256 def _parse_name_as_expression(self) -> exp.Alias: 3257 return self.expression( 3258 exp.Alias, 3259 alias=self._parse_id_var(any_token=True), 3260 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3261 ) 3262 3263 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3264 if self._match_text_seq("INTERPOLATE"): 3265 return self._parse_wrapped_csv(self._parse_name_as_expression) 3266 return None 3267 3268 def _parse_order( 3269 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3270 ) -> t.Optional[exp.Expression]: 3271 siblings = None 3272 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3273 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3274 return this 3275 3276 siblings = True 3277 3278 return self.expression( 3279 exp.Order, 3280 this=this, 3281 expressions=self._parse_csv(self._parse_ordered), 3282 interpolate=self._parse_interpolate(), 3283 siblings=siblings, 3284 ) 3285 3286 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3287 if not self._match(token): 3288 return None 3289 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3290 3291 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3292 this = parse_method() if parse_method else self._parse_conjunction() 3293 3294 asc = self._match(TokenType.ASC) 3295 desc = self._match(TokenType.DESC) or (asc and False) 3296 3297 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3298 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3299 3300 nulls_first = is_nulls_first or False 3301 explicitly_null_ordered = is_nulls_first or is_nulls_last 3302 3303 if ( 3304 not explicitly_null_ordered 3305 and ( 3306 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3307 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3308 ) 3309 and self.dialect.NULL_ORDERING != "nulls_are_last" 3310 ): 3311 nulls_first = True 3312 3313 if self._match_text_seq("WITH", "FILL"): 3314 with_fill = self.expression( 3315 exp.WithFill, 3316 **{ # type: ignore 3317 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3318 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3319 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3320 }, 3321 ) 3322 else: 3323 with_fill = None 3324 3325 return self.expression( 3326 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3327 ) 3328 3329 def _parse_limit( 3330 self, this: t.Optional[exp.Expression] = None, top: bool = False 3331 ) -> t.Optional[exp.Expression]: 3332 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3333 comments = self._prev_comments 3334 if top: 3335 limit_paren = self._match(TokenType.L_PAREN) 3336 expression = self._parse_term() if limit_paren else self._parse_number() 3337 3338 if limit_paren: 3339 self._match_r_paren() 3340 else: 3341 expression = self._parse_term() 3342 3343 if self._match(TokenType.COMMA): 3344 offset = expression 3345 expression = self._parse_term() 3346 else: 3347 offset = None 3348 3349 limit_exp = self.expression( 3350 exp.Limit, 3351 this=this, 3352 expression=expression, 3353 offset=offset, 3354 comments=comments, 3355 expressions=self._parse_limit_by(), 3356 ) 3357 3358 return limit_exp 3359 3360 if self._match(TokenType.FETCH): 3361 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3362 direction = self._prev.text.upper() if direction else "FIRST" 3363 3364 count = self._parse_field(tokens=self.FETCH_TOKENS) 3365 percent = self._match(TokenType.PERCENT) 3366 3367 self._match_set((TokenType.ROW, TokenType.ROWS)) 3368 3369 only = self._match_text_seq("ONLY") 3370 with_ties = self._match_text_seq("WITH", "TIES") 3371 3372 if only and with_ties: 3373 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3374 3375 return self.expression( 3376 exp.Fetch, 3377 direction=direction, 3378 count=count, 3379 percent=percent, 3380 with_ties=with_ties, 3381 ) 3382 3383 return this 3384 3385 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3386 if not self._match(TokenType.OFFSET): 3387 return this 3388 3389 count = self._parse_term() 3390 self._match_set((TokenType.ROW, TokenType.ROWS)) 3391 3392 return self.expression( 3393 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3394 ) 3395 3396 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3397 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3398 3399 def _parse_locks(self) -> t.List[exp.Lock]: 3400 locks = [] 3401 while True: 3402 if self._match_text_seq("FOR", "UPDATE"): 3403 update = True 3404 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3405 "LOCK", "IN", "SHARE", "MODE" 3406 ): 3407 update = False 3408 else: 3409 break 3410 3411 expressions = None 3412 if self._match_text_seq("OF"): 3413 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3414 3415 wait: t.Optional[bool | exp.Expression] = None 3416 if self._match_text_seq("NOWAIT"): 3417 wait = True 3418 elif self._match_text_seq("WAIT"): 3419 wait = self._parse_primary() 3420 elif self._match_text_seq("SKIP", "LOCKED"): 3421 wait = False 3422 3423 locks.append( 3424 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3425 ) 3426 3427 return locks 3428 3429 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3430 while this and self._match_set(self.SET_OPERATIONS): 3431 token_type = self._prev.token_type 3432 3433 if token_type == TokenType.UNION: 3434 operation = exp.Union 3435 elif token_type == TokenType.EXCEPT: 3436 operation = exp.Except 3437 else: 3438 operation = exp.Intersect 3439 3440 comments = self._prev.comments 3441 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3442 by_name = self._match_text_seq("BY", "NAME") 3443 expression = self._parse_select(nested=True, parse_set_operation=False) 3444 3445 this = self.expression( 3446 operation, 3447 comments=comments, 3448 this=this, 3449 distinct=distinct, 3450 by_name=by_name, 3451 expression=expression, 3452 ) 3453 3454 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3455 expression = this.expression 3456 3457 if expression: 3458 for arg in self.UNION_MODIFIERS: 3459 expr = expression.args.get(arg) 3460 if expr: 3461 this.set(arg, expr.pop()) 3462 3463 return this 3464 3465 def _parse_expression(self) -> t.Optional[exp.Expression]: 3466 return self._parse_alias(self._parse_conjunction()) 3467 3468 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3469 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3470 3471 def _parse_equality(self) -> t.Optional[exp.Expression]: 3472 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3473 3474 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3475 return self._parse_tokens(self._parse_range, self.COMPARISON) 3476 3477 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3478 this = this or self._parse_bitwise() 3479 negate = self._match(TokenType.NOT) 3480 3481 if self._match_set(self.RANGE_PARSERS): 3482 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3483 if not expression: 3484 return this 3485 3486 this = expression 3487 elif self._match(TokenType.ISNULL): 3488 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3489 3490 # Postgres supports ISNULL and NOTNULL for conditions. 3491 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3492 if self._match(TokenType.NOTNULL): 3493 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3494 this = self.expression(exp.Not, this=this) 3495 3496 if negate: 3497 this = self.expression(exp.Not, this=this) 3498 3499 if self._match(TokenType.IS): 3500 this = self._parse_is(this) 3501 3502 return this 3503 3504 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3505 index = self._index - 1 3506 negate = self._match(TokenType.NOT) 3507 3508 if self._match_text_seq("DISTINCT", "FROM"): 3509 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3510 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3511 3512 expression = self._parse_null() or self._parse_boolean() 3513 if not expression: 3514 self._retreat(index) 3515 return None 3516 3517 this = self.expression(exp.Is, this=this, expression=expression) 3518 return self.expression(exp.Not, this=this) if negate else this 3519 3520 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3521 unnest = self._parse_unnest(with_alias=False) 3522 if unnest: 3523 this = self.expression(exp.In, this=this, unnest=unnest) 3524 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3525 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3526 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3527 3528 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3529 this = self.expression(exp.In, this=this, query=expressions[0]) 3530 else: 3531 this = self.expression(exp.In, this=this, expressions=expressions) 3532 3533 if matched_l_paren: 3534 self._match_r_paren(this) 3535 elif not self._match(TokenType.R_BRACKET, expression=this): 3536 self.raise_error("Expecting ]") 3537 else: 3538 this = self.expression(exp.In, this=this, field=self._parse_field()) 3539 3540 return this 3541 3542 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3543 low = self._parse_bitwise() 3544 self._match(TokenType.AND) 3545 high = self._parse_bitwise() 3546 return self.expression(exp.Between, this=this, low=low, high=high) 3547 3548 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3549 if not self._match(TokenType.ESCAPE): 3550 return this 3551 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3552 3553 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3554 index = self._index 3555 3556 if not self._match(TokenType.INTERVAL) and match_interval: 3557 return None 3558 3559 if self._match(TokenType.STRING, advance=False): 3560 this = self._parse_primary() 3561 else: 3562 this = self._parse_term() 3563 3564 if not this or ( 3565 isinstance(this, exp.Column) 3566 and not this.table 3567 and not this.this.quoted 3568 and this.name.upper() == "IS" 3569 ): 3570 self._retreat(index) 3571 return None 3572 3573 unit = self._parse_function() or ( 3574 not self._match(TokenType.ALIAS, advance=False) 3575 and self._parse_var(any_token=True, upper=True) 3576 ) 3577 3578 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3579 # each INTERVAL expression into this canonical form so it's easy to transpile 3580 if this and this.is_number: 3581 this = exp.Literal.string(this.name) 3582 elif this and this.is_string: 3583 parts = this.name.split() 3584 3585 if len(parts) == 2: 3586 if unit: 3587 # This is not actually a unit, it's something else (e.g. a "window side") 3588 unit = None 3589 self._retreat(self._index - 1) 3590 3591 this = exp.Literal.string(parts[0]) 3592 unit = self.expression(exp.Var, this=parts[1].upper()) 3593 3594 return self.expression(exp.Interval, this=this, unit=unit) 3595 3596 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3597 this = self._parse_term() 3598 3599 while True: 3600 if self._match_set(self.BITWISE): 3601 this = self.expression( 3602 self.BITWISE[self._prev.token_type], 3603 this=this, 3604 expression=self._parse_term(), 3605 ) 3606 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3607 this = self.expression( 3608 exp.DPipe, 3609 this=this, 3610 expression=self._parse_term(), 3611 safe=not self.dialect.STRICT_STRING_CONCAT, 3612 ) 3613 elif self._match(TokenType.DQMARK): 3614 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3615 elif self._match_pair(TokenType.LT, TokenType.LT): 3616 this = self.expression( 3617 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3618 ) 3619 elif self._match_pair(TokenType.GT, TokenType.GT): 3620 this = self.expression( 3621 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3622 ) 3623 else: 3624 break 3625 3626 return this 3627 3628 def _parse_term(self) -> t.Optional[exp.Expression]: 3629 return self._parse_tokens(self._parse_factor, self.TERM) 3630 3631 def _parse_factor(self) -> t.Optional[exp.Expression]: 3632 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3633 this = parse_method() 3634 3635 while self._match_set(self.FACTOR): 3636 this = self.expression( 3637 self.FACTOR[self._prev.token_type], 3638 this=this, 3639 comments=self._prev_comments, 3640 expression=parse_method(), 3641 ) 3642 if isinstance(this, exp.Div): 3643 this.args["typed"] = self.dialect.TYPED_DIVISION 3644 this.args["safe"] = self.dialect.SAFE_DIVISION 3645 3646 return this 3647 3648 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3649 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3650 3651 def _parse_unary(self) -> t.Optional[exp.Expression]: 3652 if self._match_set(self.UNARY_PARSERS): 3653 return self.UNARY_PARSERS[self._prev.token_type](self) 3654 return self._parse_at_time_zone(self._parse_type()) 3655 3656 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3657 interval = parse_interval and self._parse_interval() 3658 if interval: 3659 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3660 while True: 3661 index = self._index 3662 self._match(TokenType.PLUS) 3663 3664 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3665 self._retreat(index) 3666 break 3667 3668 interval = self.expression( # type: ignore 3669 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3670 ) 3671 3672 return interval 3673 3674 index = self._index 3675 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3676 this = self._parse_column() 3677 3678 if data_type: 3679 if isinstance(this, exp.Literal): 3680 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3681 if parser: 3682 return parser(self, this, data_type) 3683 return self.expression(exp.Cast, this=this, to=data_type) 3684 if not data_type.expressions: 3685 self._retreat(index) 3686 return self._parse_column() 3687 return self._parse_column_ops(data_type) 3688 3689 return this and self._parse_column_ops(this) 3690 3691 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3692 this = self._parse_type() 3693 if not this: 3694 return None 3695 3696 return self.expression( 3697 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3698 ) 3699 3700 def _parse_types( 3701 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3702 ) -> t.Optional[exp.Expression]: 3703 index = self._index 3704 3705 prefix = self._match_text_seq("SYSUDTLIB", ".") 3706 3707 if not self._match_set(self.TYPE_TOKENS): 3708 identifier = allow_identifiers and self._parse_id_var( 3709 any_token=False, tokens=(TokenType.VAR,) 3710 ) 3711 if identifier: 3712 tokens = self.dialect.tokenize(identifier.name) 3713 3714 if len(tokens) != 1: 3715 self.raise_error("Unexpected identifier", self._prev) 3716 3717 if tokens[0].token_type in self.TYPE_TOKENS: 3718 self._prev = tokens[0] 3719 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3720 type_name = identifier.name 3721 3722 while self._match(TokenType.DOT): 3723 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3724 3725 return exp.DataType.build(type_name, udt=True) 3726 else: 3727 self._retreat(self._index - 1) 3728 return None 3729 else: 3730 return None 3731 3732 type_token = self._prev.token_type 3733 3734 if type_token == TokenType.PSEUDO_TYPE: 3735 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3736 3737 if type_token == TokenType.OBJECT_IDENTIFIER: 3738 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3739 3740 nested = type_token in self.NESTED_TYPE_TOKENS 3741 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3742 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3743 expressions = None 3744 maybe_func = False 3745 3746 if self._match(TokenType.L_PAREN): 3747 if is_struct: 3748 expressions = self._parse_csv(self._parse_struct_types) 3749 elif nested: 3750 expressions = self._parse_csv( 3751 lambda: self._parse_types( 3752 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3753 ) 3754 ) 3755 elif type_token in self.ENUM_TYPE_TOKENS: 3756 expressions = self._parse_csv(self._parse_equality) 3757 elif is_aggregate: 3758 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3759 any_token=False, tokens=(TokenType.VAR,) 3760 ) 3761 if not func_or_ident or not self._match(TokenType.COMMA): 3762 return None 3763 expressions = self._parse_csv( 3764 lambda: self._parse_types( 3765 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3766 ) 3767 ) 3768 expressions.insert(0, func_or_ident) 3769 else: 3770 expressions = self._parse_csv(self._parse_type_size) 3771 3772 if not expressions or not self._match(TokenType.R_PAREN): 3773 self._retreat(index) 3774 return None 3775 3776 maybe_func = True 3777 3778 this: t.Optional[exp.Expression] = None 3779 values: t.Optional[t.List[exp.Expression]] = None 3780 3781 if nested and self._match(TokenType.LT): 3782 if is_struct: 3783 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 3784 else: 3785 expressions = self._parse_csv( 3786 lambda: self._parse_types( 3787 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3788 ) 3789 ) 3790 3791 if not self._match(TokenType.GT): 3792 self.raise_error("Expecting >") 3793 3794 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3795 values = self._parse_csv(self._parse_conjunction) 3796 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3797 3798 if type_token in self.TIMESTAMPS: 3799 if self._match_text_seq("WITH", "TIME", "ZONE"): 3800 maybe_func = False 3801 tz_type = ( 3802 exp.DataType.Type.TIMETZ 3803 if type_token in self.TIMES 3804 else exp.DataType.Type.TIMESTAMPTZ 3805 ) 3806 this = exp.DataType(this=tz_type, expressions=expressions) 3807 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3808 maybe_func = False 3809 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3810 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3811 maybe_func = False 3812 elif type_token == TokenType.INTERVAL: 3813 unit = self._parse_var() 3814 3815 if self._match_text_seq("TO"): 3816 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3817 else: 3818 span = None 3819 3820 if span or not unit: 3821 this = self.expression( 3822 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3823 ) 3824 else: 3825 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3826 3827 if maybe_func and check_func: 3828 index2 = self._index 3829 peek = self._parse_string() 3830 3831 if not peek: 3832 self._retreat(index) 3833 return None 3834 3835 self._retreat(index2) 3836 3837 if not this: 3838 if self._match_text_seq("UNSIGNED"): 3839 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3840 if not unsigned_type_token: 3841 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3842 3843 type_token = unsigned_type_token or type_token 3844 3845 this = exp.DataType( 3846 this=exp.DataType.Type[type_token.value], 3847 expressions=expressions, 3848 nested=nested, 3849 values=values, 3850 prefix=prefix, 3851 ) 3852 3853 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3854 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3855 3856 return this 3857 3858 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 3859 index = self._index 3860 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3861 self._match(TokenType.COLON) 3862 column_def = self._parse_column_def(this) 3863 3864 if type_required and ( 3865 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 3866 ): 3867 self._retreat(index) 3868 return self._parse_types() 3869 3870 return column_def 3871 3872 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3873 if not self._match_text_seq("AT", "TIME", "ZONE"): 3874 return this 3875 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3876 3877 def _parse_column(self) -> t.Optional[exp.Expression]: 3878 this = self._parse_column_reference() 3879 return self._parse_column_ops(this) if this else self._parse_bracket(this) 3880 3881 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 3882 this = self._parse_field() 3883 if ( 3884 not this 3885 and self._match(TokenType.VALUES, advance=False) 3886 and self.VALUES_FOLLOWED_BY_PAREN 3887 and (not self._next or self._next.token_type != TokenType.L_PAREN) 3888 ): 3889 this = self._parse_id_var() 3890 3891 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 3892 3893 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3894 this = self._parse_bracket(this) 3895 3896 while self._match_set(self.COLUMN_OPERATORS): 3897 op_token = self._prev.token_type 3898 op = self.COLUMN_OPERATORS.get(op_token) 3899 3900 if op_token == TokenType.DCOLON: 3901 field = self._parse_types() 3902 if not field: 3903 self.raise_error("Expected type") 3904 elif op and self._curr: 3905 field = self._parse_column_reference() 3906 else: 3907 field = self._parse_field(anonymous_func=True, any_token=True) 3908 3909 if isinstance(field, exp.Func): 3910 # bigquery allows function calls like x.y.count(...) 3911 # SAFE.SUBSTR(...) 3912 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3913 this = self._replace_columns_with_dots(this) 3914 3915 if op: 3916 this = op(self, this, field) 3917 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3918 this = self.expression( 3919 exp.Column, 3920 this=field, 3921 table=this.this, 3922 db=this.args.get("table"), 3923 catalog=this.args.get("db"), 3924 ) 3925 else: 3926 this = self.expression(exp.Dot, this=this, expression=field) 3927 this = self._parse_bracket(this) 3928 return this 3929 3930 def _parse_primary(self) -> t.Optional[exp.Expression]: 3931 if self._match_set(self.PRIMARY_PARSERS): 3932 token_type = self._prev.token_type 3933 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3934 3935 if token_type == TokenType.STRING: 3936 expressions = [primary] 3937 while self._match(TokenType.STRING): 3938 expressions.append(exp.Literal.string(self._prev.text)) 3939 3940 if len(expressions) > 1: 3941 return self.expression(exp.Concat, expressions=expressions) 3942 3943 return primary 3944 3945 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3946 return exp.Literal.number(f"0.{self._prev.text}") 3947 3948 if self._match(TokenType.L_PAREN): 3949 comments = self._prev_comments 3950 query = self._parse_select() 3951 3952 if query: 3953 expressions = [query] 3954 else: 3955 expressions = self._parse_expressions() 3956 3957 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3958 3959 if isinstance(this, exp.Subqueryable): 3960 this = self._parse_set_operations( 3961 self._parse_subquery(this=this, parse_alias=False) 3962 ) 3963 elif len(expressions) > 1: 3964 this = self.expression(exp.Tuple, expressions=expressions) 3965 else: 3966 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3967 3968 if this: 3969 this.add_comments(comments) 3970 3971 self._match_r_paren(expression=this) 3972 return this 3973 3974 return None 3975 3976 def _parse_field( 3977 self, 3978 any_token: bool = False, 3979 tokens: t.Optional[t.Collection[TokenType]] = None, 3980 anonymous_func: bool = False, 3981 ) -> t.Optional[exp.Expression]: 3982 return ( 3983 self._parse_primary() 3984 or self._parse_function(anonymous=anonymous_func) 3985 or self._parse_id_var(any_token=any_token, tokens=tokens) 3986 ) 3987 3988 def _parse_function( 3989 self, 3990 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3991 anonymous: bool = False, 3992 optional_parens: bool = True, 3993 ) -> t.Optional[exp.Expression]: 3994 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3995 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3996 fn_syntax = False 3997 if ( 3998 self._match(TokenType.L_BRACE, advance=False) 3999 and self._next 4000 and self._next.text.upper() == "FN" 4001 ): 4002 self._advance(2) 4003 fn_syntax = True 4004 4005 func = self._parse_function_call( 4006 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4007 ) 4008 4009 if fn_syntax: 4010 self._match(TokenType.R_BRACE) 4011 4012 return func 4013 4014 def _parse_function_call( 4015 self, 4016 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4017 anonymous: bool = False, 4018 optional_parens: bool = True, 4019 ) -> t.Optional[exp.Expression]: 4020 if not self._curr: 4021 return None 4022 4023 comments = self._curr.comments 4024 token_type = self._curr.token_type 4025 this = self._curr.text 4026 upper = this.upper() 4027 4028 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4029 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4030 self._advance() 4031 return parser(self) 4032 4033 if not self._next or self._next.token_type != TokenType.L_PAREN: 4034 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4035 self._advance() 4036 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4037 4038 return None 4039 4040 if token_type not in self.FUNC_TOKENS: 4041 return None 4042 4043 self._advance(2) 4044 4045 parser = self.FUNCTION_PARSERS.get(upper) 4046 if parser and not anonymous: 4047 this = parser(self) 4048 else: 4049 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4050 4051 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4052 this = self.expression(subquery_predicate, this=self._parse_select()) 4053 self._match_r_paren() 4054 return this 4055 4056 if functions is None: 4057 functions = self.FUNCTIONS 4058 4059 function = functions.get(upper) 4060 4061 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4062 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4063 4064 if function and not anonymous: 4065 if "dialect" in function.__code__.co_varnames: 4066 func = function(args, dialect=self.dialect) 4067 else: 4068 func = function(args) 4069 4070 func = self.validate_expression(func, args) 4071 if not self.dialect.NORMALIZE_FUNCTIONS: 4072 func.meta["name"] = this 4073 4074 this = func 4075 else: 4076 this = self.expression(exp.Anonymous, this=this, expressions=args) 4077 4078 if isinstance(this, exp.Expression): 4079 this.add_comments(comments) 4080 4081 self._match_r_paren(this) 4082 return self._parse_window(this) 4083 4084 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4085 return self._parse_column_def(self._parse_id_var()) 4086 4087 def _parse_user_defined_function( 4088 self, kind: t.Optional[TokenType] = None 4089 ) -> t.Optional[exp.Expression]: 4090 this = self._parse_id_var() 4091 4092 while self._match(TokenType.DOT): 4093 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4094 4095 if not self._match(TokenType.L_PAREN): 4096 return this 4097 4098 expressions = self._parse_csv(self._parse_function_parameter) 4099 self._match_r_paren() 4100 return self.expression( 4101 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4102 ) 4103 4104 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4105 literal = self._parse_primary() 4106 if literal: 4107 return self.expression(exp.Introducer, this=token.text, expression=literal) 4108 4109 return self.expression(exp.Identifier, this=token.text) 4110 4111 def _parse_session_parameter(self) -> exp.SessionParameter: 4112 kind = None 4113 this = self._parse_id_var() or self._parse_primary() 4114 4115 if this and self._match(TokenType.DOT): 4116 kind = this.name 4117 this = self._parse_var() or self._parse_primary() 4118 4119 return self.expression(exp.SessionParameter, this=this, kind=kind) 4120 4121 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4122 index = self._index 4123 4124 if self._match(TokenType.L_PAREN): 4125 expressions = t.cast( 4126 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4127 ) 4128 4129 if not self._match(TokenType.R_PAREN): 4130 self._retreat(index) 4131 else: 4132 expressions = [self._parse_id_var()] 4133 4134 if self._match_set(self.LAMBDAS): 4135 return self.LAMBDAS[self._prev.token_type](self, expressions) 4136 4137 self._retreat(index) 4138 4139 this: t.Optional[exp.Expression] 4140 4141 if self._match(TokenType.DISTINCT): 4142 this = self.expression( 4143 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4144 ) 4145 else: 4146 this = self._parse_select_or_expression(alias=alias) 4147 4148 return self._parse_limit( 4149 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4150 ) 4151 4152 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4153 index = self._index 4154 4155 if not self.errors: 4156 try: 4157 if self._parse_select(nested=True): 4158 return this 4159 except ParseError: 4160 pass 4161 finally: 4162 self.errors.clear() 4163 self._retreat(index) 4164 4165 if not self._match(TokenType.L_PAREN): 4166 return this 4167 4168 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4169 4170 self._match_r_paren() 4171 return self.expression(exp.Schema, this=this, expressions=args) 4172 4173 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4174 return self._parse_column_def(self._parse_field(any_token=True)) 4175 4176 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4177 # column defs are not really columns, they're identifiers 4178 if isinstance(this, exp.Column): 4179 this = this.this 4180 4181 kind = self._parse_types(schema=True) 4182 4183 if self._match_text_seq("FOR", "ORDINALITY"): 4184 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4185 4186 constraints: t.List[exp.Expression] = [] 4187 4188 if not kind and self._match(TokenType.ALIAS): 4189 constraints.append( 4190 self.expression( 4191 exp.ComputedColumnConstraint, 4192 this=self._parse_conjunction(), 4193 persisted=self._match_text_seq("PERSISTED"), 4194 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4195 ) 4196 ) 4197 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4198 self._match(TokenType.ALIAS) 4199 constraints.append( 4200 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4201 ) 4202 4203 while True: 4204 constraint = self._parse_column_constraint() 4205 if not constraint: 4206 break 4207 constraints.append(constraint) 4208 4209 if not kind and not constraints: 4210 return this 4211 4212 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4213 4214 def _parse_auto_increment( 4215 self, 4216 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4217 start = None 4218 increment = None 4219 4220 if self._match(TokenType.L_PAREN, advance=False): 4221 args = self._parse_wrapped_csv(self._parse_bitwise) 4222 start = seq_get(args, 0) 4223 increment = seq_get(args, 1) 4224 elif self._match_text_seq("START"): 4225 start = self._parse_bitwise() 4226 self._match_text_seq("INCREMENT") 4227 increment = self._parse_bitwise() 4228 4229 if start and increment: 4230 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4231 4232 return exp.AutoIncrementColumnConstraint() 4233 4234 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4235 if not self._match_text_seq("REFRESH"): 4236 self._retreat(self._index - 1) 4237 return None 4238 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4239 4240 def _parse_compress(self) -> exp.CompressColumnConstraint: 4241 if self._match(TokenType.L_PAREN, advance=False): 4242 return self.expression( 4243 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4244 ) 4245 4246 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4247 4248 def _parse_generated_as_identity( 4249 self, 4250 ) -> ( 4251 exp.GeneratedAsIdentityColumnConstraint 4252 | exp.ComputedColumnConstraint 4253 | exp.GeneratedAsRowColumnConstraint 4254 ): 4255 if self._match_text_seq("BY", "DEFAULT"): 4256 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4257 this = self.expression( 4258 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4259 ) 4260 else: 4261 self._match_text_seq("ALWAYS") 4262 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4263 4264 self._match(TokenType.ALIAS) 4265 4266 if self._match_text_seq("ROW"): 4267 start = self._match_text_seq("START") 4268 if not start: 4269 self._match(TokenType.END) 4270 hidden = self._match_text_seq("HIDDEN") 4271 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4272 4273 identity = self._match_text_seq("IDENTITY") 4274 4275 if self._match(TokenType.L_PAREN): 4276 if self._match(TokenType.START_WITH): 4277 this.set("start", self._parse_bitwise()) 4278 if self._match_text_seq("INCREMENT", "BY"): 4279 this.set("increment", self._parse_bitwise()) 4280 if self._match_text_seq("MINVALUE"): 4281 this.set("minvalue", self._parse_bitwise()) 4282 if self._match_text_seq("MAXVALUE"): 4283 this.set("maxvalue", self._parse_bitwise()) 4284 4285 if self._match_text_seq("CYCLE"): 4286 this.set("cycle", True) 4287 elif self._match_text_seq("NO", "CYCLE"): 4288 this.set("cycle", False) 4289 4290 if not identity: 4291 this.set("expression", self._parse_bitwise()) 4292 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4293 args = self._parse_csv(self._parse_bitwise) 4294 this.set("start", seq_get(args, 0)) 4295 this.set("increment", seq_get(args, 1)) 4296 4297 self._match_r_paren() 4298 4299 return this 4300 4301 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4302 self._match_text_seq("LENGTH") 4303 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4304 4305 def _parse_not_constraint( 4306 self, 4307 ) -> t.Optional[exp.Expression]: 4308 if self._match_text_seq("NULL"): 4309 return self.expression(exp.NotNullColumnConstraint) 4310 if self._match_text_seq("CASESPECIFIC"): 4311 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4312 if self._match_text_seq("FOR", "REPLICATION"): 4313 return self.expression(exp.NotForReplicationColumnConstraint) 4314 return None 4315 4316 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4317 if self._match(TokenType.CONSTRAINT): 4318 this = self._parse_id_var() 4319 else: 4320 this = None 4321 4322 if self._match_texts(self.CONSTRAINT_PARSERS): 4323 return self.expression( 4324 exp.ColumnConstraint, 4325 this=this, 4326 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4327 ) 4328 4329 return this 4330 4331 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4332 if not self._match(TokenType.CONSTRAINT): 4333 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4334 4335 this = self._parse_id_var() 4336 expressions = [] 4337 4338 while True: 4339 constraint = self._parse_unnamed_constraint() or self._parse_function() 4340 if not constraint: 4341 break 4342 expressions.append(constraint) 4343 4344 return self.expression(exp.Constraint, this=this, expressions=expressions) 4345 4346 def _parse_unnamed_constraint( 4347 self, constraints: t.Optional[t.Collection[str]] = None 4348 ) -> t.Optional[exp.Expression]: 4349 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4350 constraints or self.CONSTRAINT_PARSERS 4351 ): 4352 return None 4353 4354 constraint = self._prev.text.upper() 4355 if constraint not in self.CONSTRAINT_PARSERS: 4356 self.raise_error(f"No parser found for schema constraint {constraint}.") 4357 4358 return self.CONSTRAINT_PARSERS[constraint](self) 4359 4360 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4361 self._match_text_seq("KEY") 4362 return self.expression( 4363 exp.UniqueColumnConstraint, 4364 this=self._parse_schema(self._parse_id_var(any_token=False)), 4365 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4366 ) 4367 4368 def _parse_key_constraint_options(self) -> t.List[str]: 4369 options = [] 4370 while True: 4371 if not self._curr: 4372 break 4373 4374 if self._match(TokenType.ON): 4375 action = None 4376 on = self._advance_any() and self._prev.text 4377 4378 if self._match_text_seq("NO", "ACTION"): 4379 action = "NO ACTION" 4380 elif self._match_text_seq("CASCADE"): 4381 action = "CASCADE" 4382 elif self._match_text_seq("RESTRICT"): 4383 action = "RESTRICT" 4384 elif self._match_pair(TokenType.SET, TokenType.NULL): 4385 action = "SET NULL" 4386 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4387 action = "SET DEFAULT" 4388 else: 4389 self.raise_error("Invalid key constraint") 4390 4391 options.append(f"ON {on} {action}") 4392 elif self._match_text_seq("NOT", "ENFORCED"): 4393 options.append("NOT ENFORCED") 4394 elif self._match_text_seq("DEFERRABLE"): 4395 options.append("DEFERRABLE") 4396 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4397 options.append("INITIALLY DEFERRED") 4398 elif self._match_text_seq("NORELY"): 4399 options.append("NORELY") 4400 elif self._match_text_seq("MATCH", "FULL"): 4401 options.append("MATCH FULL") 4402 else: 4403 break 4404 4405 return options 4406 4407 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4408 if match and not self._match(TokenType.REFERENCES): 4409 return None 4410 4411 expressions = None 4412 this = self._parse_table(schema=True) 4413 options = self._parse_key_constraint_options() 4414 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4415 4416 def _parse_foreign_key(self) -> exp.ForeignKey: 4417 expressions = self._parse_wrapped_id_vars() 4418 reference = self._parse_references() 4419 options = {} 4420 4421 while self._match(TokenType.ON): 4422 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4423 self.raise_error("Expected DELETE or UPDATE") 4424 4425 kind = self._prev.text.lower() 4426 4427 if self._match_text_seq("NO", "ACTION"): 4428 action = "NO ACTION" 4429 elif self._match(TokenType.SET): 4430 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4431 action = "SET " + self._prev.text.upper() 4432 else: 4433 self._advance() 4434 action = self._prev.text.upper() 4435 4436 options[kind] = action 4437 4438 return self.expression( 4439 exp.ForeignKey, 4440 expressions=expressions, 4441 reference=reference, 4442 **options, # type: ignore 4443 ) 4444 4445 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4446 return self._parse_field() 4447 4448 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4449 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4450 self._retreat(self._index - 1) 4451 return None 4452 4453 id_vars = self._parse_wrapped_id_vars() 4454 return self.expression( 4455 exp.PeriodForSystemTimeConstraint, 4456 this=seq_get(id_vars, 0), 4457 expression=seq_get(id_vars, 1), 4458 ) 4459 4460 def _parse_primary_key( 4461 self, wrapped_optional: bool = False, in_props: bool = False 4462 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4463 desc = ( 4464 self._match_set((TokenType.ASC, TokenType.DESC)) 4465 and self._prev.token_type == TokenType.DESC 4466 ) 4467 4468 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4469 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4470 4471 expressions = self._parse_wrapped_csv( 4472 self._parse_primary_key_part, optional=wrapped_optional 4473 ) 4474 options = self._parse_key_constraint_options() 4475 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4476 4477 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4478 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4479 4480 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4481 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4482 return this 4483 4484 bracket_kind = self._prev.token_type 4485 expressions = self._parse_csv( 4486 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4487 ) 4488 4489 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4490 self.raise_error("Expected ]") 4491 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4492 self.raise_error("Expected }") 4493 4494 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4495 if bracket_kind == TokenType.L_BRACE: 4496 this = self.expression(exp.Struct, expressions=expressions) 4497 elif not this or this.name.upper() == "ARRAY": 4498 this = self.expression(exp.Array, expressions=expressions) 4499 else: 4500 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4501 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4502 4503 self._add_comments(this) 4504 return self._parse_bracket(this) 4505 4506 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4507 if self._match(TokenType.COLON): 4508 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4509 return this 4510 4511 def _parse_case(self) -> t.Optional[exp.Expression]: 4512 ifs = [] 4513 default = None 4514 4515 comments = self._prev_comments 4516 expression = self._parse_conjunction() 4517 4518 while self._match(TokenType.WHEN): 4519 this = self._parse_conjunction() 4520 self._match(TokenType.THEN) 4521 then = self._parse_conjunction() 4522 ifs.append(self.expression(exp.If, this=this, true=then)) 4523 4524 if self._match(TokenType.ELSE): 4525 default = self._parse_conjunction() 4526 4527 if not self._match(TokenType.END): 4528 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4529 default = exp.column("interval") 4530 else: 4531 self.raise_error("Expected END after CASE", self._prev) 4532 4533 return self._parse_window( 4534 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4535 ) 4536 4537 def _parse_if(self) -> t.Optional[exp.Expression]: 4538 if self._match(TokenType.L_PAREN): 4539 args = self._parse_csv(self._parse_conjunction) 4540 this = self.validate_expression(exp.If.from_arg_list(args), args) 4541 self._match_r_paren() 4542 else: 4543 index = self._index - 1 4544 4545 if self.NO_PAREN_IF_COMMANDS and index == 0: 4546 return self._parse_as_command(self._prev) 4547 4548 condition = self._parse_conjunction() 4549 4550 if not condition: 4551 self._retreat(index) 4552 return None 4553 4554 self._match(TokenType.THEN) 4555 true = self._parse_conjunction() 4556 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4557 self._match(TokenType.END) 4558 this = self.expression(exp.If, this=condition, true=true, false=false) 4559 4560 return self._parse_window(this) 4561 4562 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4563 if not self._match_text_seq("VALUE", "FOR"): 4564 self._retreat(self._index - 1) 4565 return None 4566 4567 return self.expression( 4568 exp.NextValueFor, 4569 this=self._parse_column(), 4570 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4571 ) 4572 4573 def _parse_extract(self) -> exp.Extract: 4574 this = self._parse_function() or self._parse_var() or self._parse_type() 4575 4576 if self._match(TokenType.FROM): 4577 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4578 4579 if not self._match(TokenType.COMMA): 4580 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4581 4582 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4583 4584 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4585 this = self._parse_conjunction() 4586 4587 if not self._match(TokenType.ALIAS): 4588 if self._match(TokenType.COMMA): 4589 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4590 4591 self.raise_error("Expected AS after CAST") 4592 4593 fmt = None 4594 to = self._parse_types() 4595 4596 if self._match(TokenType.FORMAT): 4597 fmt_string = self._parse_string() 4598 fmt = self._parse_at_time_zone(fmt_string) 4599 4600 if not to: 4601 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4602 if to.this in exp.DataType.TEMPORAL_TYPES: 4603 this = self.expression( 4604 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4605 this=this, 4606 format=exp.Literal.string( 4607 format_time( 4608 fmt_string.this if fmt_string else "", 4609 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4610 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4611 ) 4612 ), 4613 ) 4614 4615 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4616 this.set("zone", fmt.args["zone"]) 4617 return this 4618 elif not to: 4619 self.raise_error("Expected TYPE after CAST") 4620 elif isinstance(to, exp.Identifier): 4621 to = exp.DataType.build(to.name, udt=True) 4622 elif to.this == exp.DataType.Type.CHAR: 4623 if self._match(TokenType.CHARACTER_SET): 4624 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4625 4626 return self.expression( 4627 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4628 ) 4629 4630 def _parse_string_agg(self) -> exp.Expression: 4631 if self._match(TokenType.DISTINCT): 4632 args: t.List[t.Optional[exp.Expression]] = [ 4633 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4634 ] 4635 if self._match(TokenType.COMMA): 4636 args.extend(self._parse_csv(self._parse_conjunction)) 4637 else: 4638 args = self._parse_csv(self._parse_conjunction) # type: ignore 4639 4640 index = self._index 4641 if not self._match(TokenType.R_PAREN) and args: 4642 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4643 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4644 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4645 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4646 4647 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4648 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4649 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4650 if not self._match_text_seq("WITHIN", "GROUP"): 4651 self._retreat(index) 4652 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4653 4654 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4655 order = self._parse_order(this=seq_get(args, 0)) 4656 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4657 4658 def _parse_convert( 4659 self, strict: bool, safe: t.Optional[bool] = None 4660 ) -> t.Optional[exp.Expression]: 4661 this = self._parse_bitwise() 4662 4663 if self._match(TokenType.USING): 4664 to: t.Optional[exp.Expression] = self.expression( 4665 exp.CharacterSet, this=self._parse_var() 4666 ) 4667 elif self._match(TokenType.COMMA): 4668 to = self._parse_types() 4669 else: 4670 to = None 4671 4672 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4673 4674 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4675 """ 4676 There are generally two variants of the DECODE function: 4677 4678 - DECODE(bin, charset) 4679 - DECODE(expression, search, result [, search, result] ... [, default]) 4680 4681 The second variant will always be parsed into a CASE expression. Note that NULL 4682 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4683 instead of relying on pattern matching. 4684 """ 4685 args = self._parse_csv(self._parse_conjunction) 4686 4687 if len(args) < 3: 4688 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4689 4690 expression, *expressions = args 4691 if not expression: 4692 return None 4693 4694 ifs = [] 4695 for search, result in zip(expressions[::2], expressions[1::2]): 4696 if not search or not result: 4697 return None 4698 4699 if isinstance(search, exp.Literal): 4700 ifs.append( 4701 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4702 ) 4703 elif isinstance(search, exp.Null): 4704 ifs.append( 4705 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4706 ) 4707 else: 4708 cond = exp.or_( 4709 exp.EQ(this=expression.copy(), expression=search), 4710 exp.and_( 4711 exp.Is(this=expression.copy(), expression=exp.Null()), 4712 exp.Is(this=search.copy(), expression=exp.Null()), 4713 copy=False, 4714 ), 4715 copy=False, 4716 ) 4717 ifs.append(exp.If(this=cond, true=result)) 4718 4719 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4720 4721 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4722 self._match_text_seq("KEY") 4723 key = self._parse_column() 4724 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4725 self._match_text_seq("VALUE") 4726 value = self._parse_bitwise() 4727 4728 if not key and not value: 4729 return None 4730 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4731 4732 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4733 if not this or not self._match_text_seq("FORMAT", "JSON"): 4734 return this 4735 4736 return self.expression(exp.FormatJson, this=this) 4737 4738 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4739 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4740 for value in values: 4741 if self._match_text_seq(value, "ON", on): 4742 return f"{value} ON {on}" 4743 4744 return None 4745 4746 @t.overload 4747 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 4748 ... 4749 4750 @t.overload 4751 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 4752 ... 4753 4754 def _parse_json_object(self, agg=False): 4755 star = self._parse_star() 4756 expressions = ( 4757 [star] 4758 if star 4759 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4760 ) 4761 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4762 4763 unique_keys = None 4764 if self._match_text_seq("WITH", "UNIQUE"): 4765 unique_keys = True 4766 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4767 unique_keys = False 4768 4769 self._match_text_seq("KEYS") 4770 4771 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4772 self._parse_type() 4773 ) 4774 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4775 4776 return self.expression( 4777 exp.JSONObjectAgg if agg else exp.JSONObject, 4778 expressions=expressions, 4779 null_handling=null_handling, 4780 unique_keys=unique_keys, 4781 return_type=return_type, 4782 encoding=encoding, 4783 ) 4784 4785 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4786 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4787 if not self._match_text_seq("NESTED"): 4788 this = self._parse_id_var() 4789 kind = self._parse_types(allow_identifiers=False) 4790 nested = None 4791 else: 4792 this = None 4793 kind = None 4794 nested = True 4795 4796 path = self._match_text_seq("PATH") and self._parse_string() 4797 nested_schema = nested and self._parse_json_schema() 4798 4799 return self.expression( 4800 exp.JSONColumnDef, 4801 this=this, 4802 kind=kind, 4803 path=path, 4804 nested_schema=nested_schema, 4805 ) 4806 4807 def _parse_json_schema(self) -> exp.JSONSchema: 4808 self._match_text_seq("COLUMNS") 4809 return self.expression( 4810 exp.JSONSchema, 4811 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4812 ) 4813 4814 def _parse_json_table(self) -> exp.JSONTable: 4815 this = self._parse_format_json(self._parse_bitwise()) 4816 path = self._match(TokenType.COMMA) and self._parse_string() 4817 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4818 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4819 schema = self._parse_json_schema() 4820 4821 return exp.JSONTable( 4822 this=this, 4823 schema=schema, 4824 path=path, 4825 error_handling=error_handling, 4826 empty_handling=empty_handling, 4827 ) 4828 4829 def _parse_match_against(self) -> exp.MatchAgainst: 4830 expressions = self._parse_csv(self._parse_column) 4831 4832 self._match_text_seq(")", "AGAINST", "(") 4833 4834 this = self._parse_string() 4835 4836 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4837 modifier = "IN NATURAL LANGUAGE MODE" 4838 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4839 modifier = f"{modifier} WITH QUERY EXPANSION" 4840 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4841 modifier = "IN BOOLEAN MODE" 4842 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4843 modifier = "WITH QUERY EXPANSION" 4844 else: 4845 modifier = None 4846 4847 return self.expression( 4848 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4849 ) 4850 4851 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4852 def _parse_open_json(self) -> exp.OpenJSON: 4853 this = self._parse_bitwise() 4854 path = self._match(TokenType.COMMA) and self._parse_string() 4855 4856 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4857 this = self._parse_field(any_token=True) 4858 kind = self._parse_types() 4859 path = self._parse_string() 4860 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4861 4862 return self.expression( 4863 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4864 ) 4865 4866 expressions = None 4867 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4868 self._match_l_paren() 4869 expressions = self._parse_csv(_parse_open_json_column_def) 4870 4871 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4872 4873 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4874 args = self._parse_csv(self._parse_bitwise) 4875 4876 if self._match(TokenType.IN): 4877 return self.expression( 4878 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4879 ) 4880 4881 if haystack_first: 4882 haystack = seq_get(args, 0) 4883 needle = seq_get(args, 1) 4884 else: 4885 needle = seq_get(args, 0) 4886 haystack = seq_get(args, 1) 4887 4888 return self.expression( 4889 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4890 ) 4891 4892 def _parse_predict(self) -> exp.Predict: 4893 self._match_text_seq("MODEL") 4894 this = self._parse_table() 4895 4896 self._match(TokenType.COMMA) 4897 self._match_text_seq("TABLE") 4898 4899 return self.expression( 4900 exp.Predict, 4901 this=this, 4902 expression=self._parse_table(), 4903 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4904 ) 4905 4906 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4907 args = self._parse_csv(self._parse_table) 4908 return exp.JoinHint(this=func_name.upper(), expressions=args) 4909 4910 def _parse_substring(self) -> exp.Substring: 4911 # Postgres supports the form: substring(string [from int] [for int]) 4912 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4913 4914 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4915 4916 if self._match(TokenType.FROM): 4917 args.append(self._parse_bitwise()) 4918 if self._match(TokenType.FOR): 4919 args.append(self._parse_bitwise()) 4920 4921 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4922 4923 def _parse_trim(self) -> exp.Trim: 4924 # https://www.w3resource.com/sql/character-functions/trim.php 4925 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4926 4927 position = None 4928 collation = None 4929 expression = None 4930 4931 if self._match_texts(self.TRIM_TYPES): 4932 position = self._prev.text.upper() 4933 4934 this = self._parse_bitwise() 4935 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4936 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4937 expression = self._parse_bitwise() 4938 4939 if invert_order: 4940 this, expression = expression, this 4941 4942 if self._match(TokenType.COLLATE): 4943 collation = self._parse_bitwise() 4944 4945 return self.expression( 4946 exp.Trim, this=this, position=position, expression=expression, collation=collation 4947 ) 4948 4949 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4950 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4951 4952 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4953 return self._parse_window(self._parse_id_var(), alias=True) 4954 4955 def _parse_respect_or_ignore_nulls( 4956 self, this: t.Optional[exp.Expression] 4957 ) -> t.Optional[exp.Expression]: 4958 if self._match_text_seq("IGNORE", "NULLS"): 4959 return self.expression(exp.IgnoreNulls, this=this) 4960 if self._match_text_seq("RESPECT", "NULLS"): 4961 return self.expression(exp.RespectNulls, this=this) 4962 return this 4963 4964 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4965 if self._match(TokenType.HAVING): 4966 self._match_texts(("MAX", "MIN")) 4967 max = self._prev.text.upper() != "MIN" 4968 return self.expression( 4969 exp.HavingMax, this=this, expression=self._parse_column(), max=max 4970 ) 4971 4972 return this 4973 4974 def _parse_window( 4975 self, this: t.Optional[exp.Expression], alias: bool = False 4976 ) -> t.Optional[exp.Expression]: 4977 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4978 self._match(TokenType.WHERE) 4979 this = self.expression( 4980 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4981 ) 4982 self._match_r_paren() 4983 4984 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4985 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4986 if self._match_text_seq("WITHIN", "GROUP"): 4987 order = self._parse_wrapped(self._parse_order) 4988 this = self.expression(exp.WithinGroup, this=this, expression=order) 4989 4990 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4991 # Some dialects choose to implement and some do not. 4992 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4993 4994 # There is some code above in _parse_lambda that handles 4995 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4996 4997 # The below changes handle 4998 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4999 5000 # Oracle allows both formats 5001 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5002 # and Snowflake chose to do the same for familiarity 5003 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5004 if isinstance(this, exp.AggFunc): 5005 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5006 5007 if ignore_respect and ignore_respect is not this: 5008 ignore_respect.replace(ignore_respect.this) 5009 this = self.expression(ignore_respect.__class__, this=this) 5010 5011 this = self._parse_respect_or_ignore_nulls(this) 5012 5013 # bigquery select from window x AS (partition by ...) 5014 if alias: 5015 over = None 5016 self._match(TokenType.ALIAS) 5017 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5018 return this 5019 else: 5020 over = self._prev.text.upper() 5021 5022 if not self._match(TokenType.L_PAREN): 5023 return self.expression( 5024 exp.Window, this=this, alias=self._parse_id_var(False), over=over 5025 ) 5026 5027 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5028 5029 first = self._match(TokenType.FIRST) 5030 if self._match_text_seq("LAST"): 5031 first = False 5032 5033 partition, order = self._parse_partition_and_order() 5034 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5035 5036 if kind: 5037 self._match(TokenType.BETWEEN) 5038 start = self._parse_window_spec() 5039 self._match(TokenType.AND) 5040 end = self._parse_window_spec() 5041 5042 spec = self.expression( 5043 exp.WindowSpec, 5044 kind=kind, 5045 start=start["value"], 5046 start_side=start["side"], 5047 end=end["value"], 5048 end_side=end["side"], 5049 ) 5050 else: 5051 spec = None 5052 5053 self._match_r_paren() 5054 5055 window = self.expression( 5056 exp.Window, 5057 this=this, 5058 partition_by=partition, 5059 order=order, 5060 spec=spec, 5061 alias=window_alias, 5062 over=over, 5063 first=first, 5064 ) 5065 5066 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5067 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5068 return self._parse_window(window, alias=alias) 5069 5070 return window 5071 5072 def _parse_partition_and_order( 5073 self, 5074 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5075 return self._parse_partition_by(), self._parse_order() 5076 5077 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5078 self._match(TokenType.BETWEEN) 5079 5080 return { 5081 "value": ( 5082 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5083 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5084 or self._parse_bitwise() 5085 ), 5086 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5087 } 5088 5089 def _parse_alias( 5090 self, this: t.Optional[exp.Expression], explicit: bool = False 5091 ) -> t.Optional[exp.Expression]: 5092 any_token = self._match(TokenType.ALIAS) 5093 comments = self._prev_comments 5094 5095 if explicit and not any_token: 5096 return this 5097 5098 if self._match(TokenType.L_PAREN): 5099 aliases = self.expression( 5100 exp.Aliases, 5101 comments=comments, 5102 this=this, 5103 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5104 ) 5105 self._match_r_paren(aliases) 5106 return aliases 5107 5108 alias = self._parse_id_var(any_token) or ( 5109 self.STRING_ALIASES and self._parse_string_as_identifier() 5110 ) 5111 5112 if alias: 5113 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5114 column = this.this 5115 5116 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5117 if not this.comments and column and column.comments: 5118 this.comments = column.comments 5119 column.comments = None 5120 5121 return this 5122 5123 def _parse_id_var( 5124 self, 5125 any_token: bool = True, 5126 tokens: t.Optional[t.Collection[TokenType]] = None, 5127 ) -> t.Optional[exp.Expression]: 5128 identifier = self._parse_identifier() 5129 5130 if identifier: 5131 return identifier 5132 5133 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5134 quoted = self._prev.token_type == TokenType.STRING 5135 return exp.Identifier(this=self._prev.text, quoted=quoted) 5136 5137 return None 5138 5139 def _parse_string(self) -> t.Optional[exp.Expression]: 5140 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 5141 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 5142 return self._parse_placeholder() 5143 5144 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5145 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5146 5147 def _parse_number(self) -> t.Optional[exp.Expression]: 5148 if self._match(TokenType.NUMBER): 5149 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 5150 return self._parse_placeholder() 5151 5152 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5153 if self._match(TokenType.IDENTIFIER): 5154 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5155 return self._parse_placeholder() 5156 5157 def _parse_var( 5158 self, 5159 any_token: bool = False, 5160 tokens: t.Optional[t.Collection[TokenType]] = None, 5161 upper: bool = False, 5162 ) -> t.Optional[exp.Expression]: 5163 if ( 5164 (any_token and self._advance_any()) 5165 or self._match(TokenType.VAR) 5166 or (self._match_set(tokens) if tokens else False) 5167 ): 5168 return self.expression( 5169 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5170 ) 5171 return self._parse_placeholder() 5172 5173 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5174 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5175 self._advance() 5176 return self._prev 5177 return None 5178 5179 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5180 return self._parse_var() or self._parse_string() 5181 5182 def _parse_null(self) -> t.Optional[exp.Expression]: 5183 if self._match_set(self.NULL_TOKENS): 5184 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5185 return self._parse_placeholder() 5186 5187 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5188 if self._match(TokenType.TRUE): 5189 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5190 if self._match(TokenType.FALSE): 5191 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5192 return self._parse_placeholder() 5193 5194 def _parse_star(self) -> t.Optional[exp.Expression]: 5195 if self._match(TokenType.STAR): 5196 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5197 return self._parse_placeholder() 5198 5199 def _parse_parameter(self) -> exp.Parameter: 5200 def _parse_parameter_part() -> t.Optional[exp.Expression]: 5201 return ( 5202 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 5203 ) 5204 5205 self._match(TokenType.L_BRACE) 5206 this = _parse_parameter_part() 5207 expression = self._match(TokenType.COLON) and _parse_parameter_part() 5208 self._match(TokenType.R_BRACE) 5209 5210 return self.expression(exp.Parameter, this=this, expression=expression) 5211 5212 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5213 if self._match_set(self.PLACEHOLDER_PARSERS): 5214 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5215 if placeholder: 5216 return placeholder 5217 self._advance(-1) 5218 return None 5219 5220 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5221 if not self._match(TokenType.EXCEPT): 5222 return None 5223 if self._match(TokenType.L_PAREN, advance=False): 5224 return self._parse_wrapped_csv(self._parse_column) 5225 5226 except_column = self._parse_column() 5227 return [except_column] if except_column else None 5228 5229 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5230 if not self._match(TokenType.REPLACE): 5231 return None 5232 if self._match(TokenType.L_PAREN, advance=False): 5233 return self._parse_wrapped_csv(self._parse_expression) 5234 5235 replace_expression = self._parse_expression() 5236 return [replace_expression] if replace_expression else None 5237 5238 def _parse_csv( 5239 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5240 ) -> t.List[exp.Expression]: 5241 parse_result = parse_method() 5242 items = [parse_result] if parse_result is not None else [] 5243 5244 while self._match(sep): 5245 self._add_comments(parse_result) 5246 parse_result = parse_method() 5247 if parse_result is not None: 5248 items.append(parse_result) 5249 5250 return items 5251 5252 def _parse_tokens( 5253 self, parse_method: t.Callable, expressions: t.Dict 5254 ) -> t.Optional[exp.Expression]: 5255 this = parse_method() 5256 5257 while self._match_set(expressions): 5258 this = self.expression( 5259 expressions[self._prev.token_type], 5260 this=this, 5261 comments=self._prev_comments, 5262 expression=parse_method(), 5263 ) 5264 5265 return this 5266 5267 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5268 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5269 5270 def _parse_wrapped_csv( 5271 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5272 ) -> t.List[exp.Expression]: 5273 return self._parse_wrapped( 5274 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5275 ) 5276 5277 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5278 wrapped = self._match(TokenType.L_PAREN) 5279 if not wrapped and not optional: 5280 self.raise_error("Expecting (") 5281 parse_result = parse_method() 5282 if wrapped: 5283 self._match_r_paren() 5284 return parse_result 5285 5286 def _parse_expressions(self) -> t.List[exp.Expression]: 5287 return self._parse_csv(self._parse_expression) 5288 5289 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5290 return self._parse_select() or self._parse_set_operations( 5291 self._parse_expression() if alias else self._parse_conjunction() 5292 ) 5293 5294 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5295 return self._parse_query_modifiers( 5296 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5297 ) 5298 5299 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5300 this = None 5301 if self._match_texts(self.TRANSACTION_KIND): 5302 this = self._prev.text 5303 5304 self._match_texts(("TRANSACTION", "WORK")) 5305 5306 modes = [] 5307 while True: 5308 mode = [] 5309 while self._match(TokenType.VAR): 5310 mode.append(self._prev.text) 5311 5312 if mode: 5313 modes.append(" ".join(mode)) 5314 if not self._match(TokenType.COMMA): 5315 break 5316 5317 return self.expression(exp.Transaction, this=this, modes=modes) 5318 5319 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5320 chain = None 5321 savepoint = None 5322 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5323 5324 self._match_texts(("TRANSACTION", "WORK")) 5325 5326 if self._match_text_seq("TO"): 5327 self._match_text_seq("SAVEPOINT") 5328 savepoint = self._parse_id_var() 5329 5330 if self._match(TokenType.AND): 5331 chain = not self._match_text_seq("NO") 5332 self._match_text_seq("CHAIN") 5333 5334 if is_rollback: 5335 return self.expression(exp.Rollback, savepoint=savepoint) 5336 5337 return self.expression(exp.Commit, chain=chain) 5338 5339 def _parse_refresh(self) -> exp.Refresh: 5340 self._match(TokenType.TABLE) 5341 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5342 5343 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5344 if not self._match_text_seq("ADD"): 5345 return None 5346 5347 self._match(TokenType.COLUMN) 5348 exists_column = self._parse_exists(not_=True) 5349 expression = self._parse_field_def() 5350 5351 if expression: 5352 expression.set("exists", exists_column) 5353 5354 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5355 if self._match_texts(("FIRST", "AFTER")): 5356 position = self._prev.text 5357 column_position = self.expression( 5358 exp.ColumnPosition, this=self._parse_column(), position=position 5359 ) 5360 expression.set("position", column_position) 5361 5362 return expression 5363 5364 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5365 drop = self._match(TokenType.DROP) and self._parse_drop() 5366 if drop and not isinstance(drop, exp.Command): 5367 drop.set("kind", drop.args.get("kind", "COLUMN")) 5368 return drop 5369 5370 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5371 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5372 return self.expression( 5373 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5374 ) 5375 5376 def _parse_add_constraint(self) -> exp.AddConstraint: 5377 this = None 5378 kind = self._prev.token_type 5379 5380 if kind == TokenType.CONSTRAINT: 5381 this = self._parse_id_var() 5382 5383 if self._match_text_seq("CHECK"): 5384 expression = self._parse_wrapped(self._parse_conjunction) 5385 enforced = self._match_text_seq("ENFORCED") or False 5386 5387 return self.expression( 5388 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5389 ) 5390 5391 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5392 expression = self._parse_foreign_key() 5393 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5394 expression = self._parse_primary_key() 5395 else: 5396 expression = None 5397 5398 return self.expression(exp.AddConstraint, this=this, expression=expression) 5399 5400 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5401 index = self._index - 1 5402 5403 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5404 return self._parse_csv(self._parse_add_constraint) 5405 5406 self._retreat(index) 5407 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5408 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5409 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5410 5411 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5412 self._match(TokenType.COLUMN) 5413 column = self._parse_field(any_token=True) 5414 5415 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5416 return self.expression(exp.AlterColumn, this=column, drop=True) 5417 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5418 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5419 if self._match(TokenType.COMMENT): 5420 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5421 5422 self._match_text_seq("SET", "DATA") 5423 return self.expression( 5424 exp.AlterColumn, 5425 this=column, 5426 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5427 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5428 using=self._match(TokenType.USING) and self._parse_conjunction(), 5429 ) 5430 5431 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5432 index = self._index - 1 5433 5434 partition_exists = self._parse_exists() 5435 if self._match(TokenType.PARTITION, advance=False): 5436 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5437 5438 self._retreat(index) 5439 return self._parse_csv(self._parse_drop_column) 5440 5441 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5442 if self._match(TokenType.COLUMN): 5443 exists = self._parse_exists() 5444 old_column = self._parse_column() 5445 to = self._match_text_seq("TO") 5446 new_column = self._parse_column() 5447 5448 if old_column is None or to is None or new_column is None: 5449 return None 5450 5451 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5452 5453 self._match_text_seq("TO") 5454 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5455 5456 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5457 start = self._prev 5458 5459 if not self._match(TokenType.TABLE): 5460 return self._parse_as_command(start) 5461 5462 exists = self._parse_exists() 5463 only = self._match_text_seq("ONLY") 5464 this = self._parse_table(schema=True) 5465 5466 if self._next: 5467 self._advance() 5468 5469 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5470 if parser: 5471 actions = ensure_list(parser(self)) 5472 5473 if not self._curr and actions: 5474 return self.expression( 5475 exp.AlterTable, 5476 this=this, 5477 exists=exists, 5478 actions=actions, 5479 only=only, 5480 ) 5481 5482 return self._parse_as_command(start) 5483 5484 def _parse_merge(self) -> exp.Merge: 5485 self._match(TokenType.INTO) 5486 target = self._parse_table() 5487 5488 if target and self._match(TokenType.ALIAS, advance=False): 5489 target.set("alias", self._parse_table_alias()) 5490 5491 self._match(TokenType.USING) 5492 using = self._parse_table() 5493 5494 self._match(TokenType.ON) 5495 on = self._parse_conjunction() 5496 5497 return self.expression( 5498 exp.Merge, 5499 this=target, 5500 using=using, 5501 on=on, 5502 expressions=self._parse_when_matched(), 5503 ) 5504 5505 def _parse_when_matched(self) -> t.List[exp.When]: 5506 whens = [] 5507 5508 while self._match(TokenType.WHEN): 5509 matched = not self._match(TokenType.NOT) 5510 self._match_text_seq("MATCHED") 5511 source = ( 5512 False 5513 if self._match_text_seq("BY", "TARGET") 5514 else self._match_text_seq("BY", "SOURCE") 5515 ) 5516 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5517 5518 self._match(TokenType.THEN) 5519 5520 if self._match(TokenType.INSERT): 5521 _this = self._parse_star() 5522 if _this: 5523 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5524 else: 5525 then = self.expression( 5526 exp.Insert, 5527 this=self._parse_value(), 5528 expression=self._match_text_seq("VALUES") and self._parse_value(), 5529 ) 5530 elif self._match(TokenType.UPDATE): 5531 expressions = self._parse_star() 5532 if expressions: 5533 then = self.expression(exp.Update, expressions=expressions) 5534 else: 5535 then = self.expression( 5536 exp.Update, 5537 expressions=self._match(TokenType.SET) 5538 and self._parse_csv(self._parse_equality), 5539 ) 5540 elif self._match(TokenType.DELETE): 5541 then = self.expression(exp.Var, this=self._prev.text) 5542 else: 5543 then = None 5544 5545 whens.append( 5546 self.expression( 5547 exp.When, 5548 matched=matched, 5549 source=source, 5550 condition=condition, 5551 then=then, 5552 ) 5553 ) 5554 return whens 5555 5556 def _parse_show(self) -> t.Optional[exp.Expression]: 5557 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5558 if parser: 5559 return parser(self) 5560 return self._parse_as_command(self._prev) 5561 5562 def _parse_set_item_assignment( 5563 self, kind: t.Optional[str] = None 5564 ) -> t.Optional[exp.Expression]: 5565 index = self._index 5566 5567 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5568 return self._parse_set_transaction(global_=kind == "GLOBAL") 5569 5570 left = self._parse_primary() or self._parse_id_var() 5571 assignment_delimiter = self._match_texts(("=", "TO")) 5572 5573 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5574 self._retreat(index) 5575 return None 5576 5577 right = self._parse_statement() or self._parse_id_var() 5578 this = self.expression(exp.EQ, this=left, expression=right) 5579 5580 return self.expression(exp.SetItem, this=this, kind=kind) 5581 5582 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5583 self._match_text_seq("TRANSACTION") 5584 characteristics = self._parse_csv( 5585 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5586 ) 5587 return self.expression( 5588 exp.SetItem, 5589 expressions=characteristics, 5590 kind="TRANSACTION", 5591 **{"global": global_}, # type: ignore 5592 ) 5593 5594 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5595 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5596 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5597 5598 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5599 index = self._index 5600 set_ = self.expression( 5601 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5602 ) 5603 5604 if self._curr: 5605 self._retreat(index) 5606 return self._parse_as_command(self._prev) 5607 5608 return set_ 5609 5610 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5611 for option in options: 5612 if self._match_text_seq(*option.split(" ")): 5613 return exp.var(option) 5614 return None 5615 5616 def _parse_as_command(self, start: Token) -> exp.Command: 5617 while self._curr: 5618 self._advance() 5619 text = self._find_sql(start, self._prev) 5620 size = len(start.text) 5621 self._warn_unsupported() 5622 return exp.Command(this=text[:size], expression=text[size:]) 5623 5624 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5625 settings = [] 5626 5627 self._match_l_paren() 5628 kind = self._parse_id_var() 5629 5630 if self._match(TokenType.L_PAREN): 5631 while True: 5632 key = self._parse_id_var() 5633 value = self._parse_primary() 5634 5635 if not key and value is None: 5636 break 5637 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5638 self._match(TokenType.R_PAREN) 5639 5640 self._match_r_paren() 5641 5642 return self.expression( 5643 exp.DictProperty, 5644 this=this, 5645 kind=kind.this if kind else None, 5646 settings=settings, 5647 ) 5648 5649 def _parse_dict_range(self, this: str) -> exp.DictRange: 5650 self._match_l_paren() 5651 has_min = self._match_text_seq("MIN") 5652 if has_min: 5653 min = self._parse_var() or self._parse_primary() 5654 self._match_text_seq("MAX") 5655 max = self._parse_var() or self._parse_primary() 5656 else: 5657 max = self._parse_var() or self._parse_primary() 5658 min = exp.Literal.number(0) 5659 self._match_r_paren() 5660 return self.expression(exp.DictRange, this=this, min=min, max=max) 5661 5662 def _parse_comprehension( 5663 self, this: t.Optional[exp.Expression] 5664 ) -> t.Optional[exp.Comprehension]: 5665 index = self._index 5666 expression = self._parse_column() 5667 if not self._match(TokenType.IN): 5668 self._retreat(index - 1) 5669 return None 5670 iterator = self._parse_column() 5671 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5672 return self.expression( 5673 exp.Comprehension, 5674 this=this, 5675 expression=expression, 5676 iterator=iterator, 5677 condition=condition, 5678 ) 5679 5680 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5681 if self._match(TokenType.HEREDOC_STRING): 5682 return self.expression(exp.Heredoc, this=self._prev.text) 5683 5684 if not self._match_text_seq("$"): 5685 return None 5686 5687 tags = ["$"] 5688 tag_text = None 5689 5690 if self._is_connected(): 5691 self._advance() 5692 tags.append(self._prev.text.upper()) 5693 else: 5694 self.raise_error("No closing $ found") 5695 5696 if tags[-1] != "$": 5697 if self._is_connected() and self._match_text_seq("$"): 5698 tag_text = tags[-1] 5699 tags.append("$") 5700 else: 5701 self.raise_error("No closing $ found") 5702 5703 heredoc_start = self._curr 5704 5705 while self._curr: 5706 if self._match_text_seq(*tags, advance=False): 5707 this = self._find_sql(heredoc_start, self._prev) 5708 self._advance(len(tags)) 5709 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5710 5711 self._advance() 5712 5713 self.raise_error(f"No closing {''.join(tags)} found") 5714 return None 5715 5716 def _find_parser( 5717 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5718 ) -> t.Optional[t.Callable]: 5719 if not self._curr: 5720 return None 5721 5722 index = self._index 5723 this = [] 5724 while True: 5725 # The current token might be multiple words 5726 curr = self._curr.text.upper() 5727 key = curr.split(" ") 5728 this.append(curr) 5729 5730 self._advance() 5731 result, trie = in_trie(trie, key) 5732 if result == TrieResult.FAILED: 5733 break 5734 5735 if result == TrieResult.EXISTS: 5736 subparser = parsers[" ".join(this)] 5737 return subparser 5738 5739 self._retreat(index) 5740 return None 5741 5742 def _match(self, token_type, advance=True, expression=None): 5743 if not self._curr: 5744 return None 5745 5746 if self._curr.token_type == token_type: 5747 if advance: 5748 self._advance() 5749 self._add_comments(expression) 5750 return True 5751 5752 return None 5753 5754 def _match_set(self, types, advance=True): 5755 if not self._curr: 5756 return None 5757 5758 if self._curr.token_type in types: 5759 if advance: 5760 self._advance() 5761 return True 5762 5763 return None 5764 5765 def _match_pair(self, token_type_a, token_type_b, advance=True): 5766 if not self._curr or not self._next: 5767 return None 5768 5769 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5770 if advance: 5771 self._advance(2) 5772 return True 5773 5774 return None 5775 5776 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5777 if not self._match(TokenType.L_PAREN, expression=expression): 5778 self.raise_error("Expecting (") 5779 5780 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5781 if not self._match(TokenType.R_PAREN, expression=expression): 5782 self.raise_error("Expecting )") 5783 5784 def _match_texts(self, texts, advance=True): 5785 if self._curr and self._curr.text.upper() in texts: 5786 if advance: 5787 self._advance() 5788 return True 5789 return None 5790 5791 def _match_text_seq(self, *texts, advance=True): 5792 index = self._index 5793 for text in texts: 5794 if self._curr and self._curr.text.upper() == text: 5795 self._advance() 5796 else: 5797 self._retreat(index) 5798 return None 5799 5800 if not advance: 5801 self._retreat(index) 5802 5803 return True 5804 5805 @t.overload 5806 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5807 ... 5808 5809 @t.overload 5810 def _replace_columns_with_dots( 5811 self, this: t.Optional[exp.Expression] 5812 ) -> t.Optional[exp.Expression]: 5813 ... 5814 5815 def _replace_columns_with_dots(self, this): 5816 if isinstance(this, exp.Dot): 5817 exp.replace_children(this, self._replace_columns_with_dots) 5818 elif isinstance(this, exp.Column): 5819 exp.replace_children(this, self._replace_columns_with_dots) 5820 table = this.args.get("table") 5821 this = ( 5822 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5823 ) 5824 5825 return this 5826 5827 def _replace_lambda( 5828 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5829 ) -> t.Optional[exp.Expression]: 5830 if not node: 5831 return node 5832 5833 for column in node.find_all(exp.Column): 5834 if column.parts[0].name in lambda_variables: 5835 dot_or_id = column.to_dot() if column.table else column.this 5836 parent = column.parent 5837 5838 while isinstance(parent, exp.Dot): 5839 if not isinstance(parent.parent, exp.Dot): 5840 parent.replace(dot_or_id) 5841 break 5842 parent = parent.parent 5843 else: 5844 if column is node: 5845 node = dot_or_id 5846 else: 5847 column.replace(dot_or_id) 5848 return node
22def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 23 if len(args) == 1 and args[0].is_star: 24 return exp.StarMap(this=args[0]) 25 26 keys = [] 27 values = [] 28 for i in range(0, len(args), 2): 29 keys.append(args[i]) 30 values.append(args[i + 1]) 31 32 return exp.VarMap( 33 keys=exp.array(*keys, copy=False), 34 values=exp.array(*values, copy=False), 35 )
51def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
64def parse_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 65 def _parser(args: t.List, dialect: Dialect) -> E: 66 expression = expr_type( 67 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 68 ) 69 if len(args) > 2 and expr_type is exp.JSONExtract: 70 expression.set("expressions", args[2:]) 71 72 return expression 73 74 return _parser
87class Parser(metaclass=_Parser): 88 """ 89 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 90 91 Args: 92 error_level: The desired error level. 93 Default: ErrorLevel.IMMEDIATE 94 error_message_context: Determines the amount of context to capture from a 95 query string when displaying the error message (in number of characters). 96 Default: 100 97 max_errors: Maximum number of error messages to include in a raised ParseError. 98 This is only relevant if error_level is ErrorLevel.RAISE. 99 Default: 3 100 """ 101 102 FUNCTIONS: t.Dict[str, t.Callable] = { 103 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 104 "CONCAT": lambda args, dialect: exp.Concat( 105 expressions=args, 106 safe=not dialect.STRICT_STRING_CONCAT, 107 coalesce=dialect.CONCAT_COALESCE, 108 ), 109 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 110 expressions=args, 111 safe=not dialect.STRICT_STRING_CONCAT, 112 coalesce=dialect.CONCAT_COALESCE, 113 ), 114 "DATE_TO_DATE_STR": lambda args: exp.Cast( 115 this=seq_get(args, 0), 116 to=exp.DataType(this=exp.DataType.Type.TEXT), 117 ), 118 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 119 "JSON_EXTRACT": parse_extract_json_with_path(exp.JSONExtract), 120 "JSON_EXTRACT_SCALAR": parse_extract_json_with_path(exp.JSONExtractScalar), 121 "JSON_EXTRACT_PATH_TEXT": parse_extract_json_with_path(exp.JSONExtractScalar), 122 "LIKE": parse_like, 123 "LOG": parse_logarithm, 124 "TIME_TO_TIME_STR": lambda args: exp.Cast( 125 this=seq_get(args, 0), 126 to=exp.DataType(this=exp.DataType.Type.TEXT), 127 ), 128 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 129 this=exp.Cast( 130 this=seq_get(args, 0), 131 to=exp.DataType(this=exp.DataType.Type.TEXT), 132 ), 133 start=exp.Literal.number(1), 134 length=exp.Literal.number(10), 135 ), 136 "VAR_MAP": parse_var_map, 137 } 138 139 NO_PAREN_FUNCTIONS = { 140 TokenType.CURRENT_DATE: exp.CurrentDate, 141 TokenType.CURRENT_DATETIME: exp.CurrentDate, 142 TokenType.CURRENT_TIME: exp.CurrentTime, 143 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 144 TokenType.CURRENT_USER: exp.CurrentUser, 145 } 146 147 STRUCT_TYPE_TOKENS = { 148 TokenType.NESTED, 149 TokenType.STRUCT, 150 } 151 152 NESTED_TYPE_TOKENS = { 153 TokenType.ARRAY, 154 TokenType.LOWCARDINALITY, 155 TokenType.MAP, 156 TokenType.NULLABLE, 157 *STRUCT_TYPE_TOKENS, 158 } 159 160 ENUM_TYPE_TOKENS = { 161 TokenType.ENUM, 162 TokenType.ENUM8, 163 TokenType.ENUM16, 164 } 165 166 AGGREGATE_TYPE_TOKENS = { 167 TokenType.AGGREGATEFUNCTION, 168 TokenType.SIMPLEAGGREGATEFUNCTION, 169 } 170 171 TYPE_TOKENS = { 172 TokenType.BIT, 173 TokenType.BOOLEAN, 174 TokenType.TINYINT, 175 TokenType.UTINYINT, 176 TokenType.SMALLINT, 177 TokenType.USMALLINT, 178 TokenType.INT, 179 TokenType.UINT, 180 TokenType.BIGINT, 181 TokenType.UBIGINT, 182 TokenType.INT128, 183 TokenType.UINT128, 184 TokenType.INT256, 185 TokenType.UINT256, 186 TokenType.MEDIUMINT, 187 TokenType.UMEDIUMINT, 188 TokenType.FIXEDSTRING, 189 TokenType.FLOAT, 190 TokenType.DOUBLE, 191 TokenType.CHAR, 192 TokenType.NCHAR, 193 TokenType.VARCHAR, 194 TokenType.NVARCHAR, 195 TokenType.BPCHAR, 196 TokenType.TEXT, 197 TokenType.MEDIUMTEXT, 198 TokenType.LONGTEXT, 199 TokenType.MEDIUMBLOB, 200 TokenType.LONGBLOB, 201 TokenType.BINARY, 202 TokenType.VARBINARY, 203 TokenType.JSON, 204 TokenType.JSONB, 205 TokenType.INTERVAL, 206 TokenType.TINYBLOB, 207 TokenType.TINYTEXT, 208 TokenType.TIME, 209 TokenType.TIMETZ, 210 TokenType.TIMESTAMP, 211 TokenType.TIMESTAMP_S, 212 TokenType.TIMESTAMP_MS, 213 TokenType.TIMESTAMP_NS, 214 TokenType.TIMESTAMPTZ, 215 TokenType.TIMESTAMPLTZ, 216 TokenType.DATETIME, 217 TokenType.DATETIME64, 218 TokenType.DATE, 219 TokenType.DATE32, 220 TokenType.INT4RANGE, 221 TokenType.INT4MULTIRANGE, 222 TokenType.INT8RANGE, 223 TokenType.INT8MULTIRANGE, 224 TokenType.NUMRANGE, 225 TokenType.NUMMULTIRANGE, 226 TokenType.TSRANGE, 227 TokenType.TSMULTIRANGE, 228 TokenType.TSTZRANGE, 229 TokenType.TSTZMULTIRANGE, 230 TokenType.DATERANGE, 231 TokenType.DATEMULTIRANGE, 232 TokenType.DECIMAL, 233 TokenType.UDECIMAL, 234 TokenType.BIGDECIMAL, 235 TokenType.UUID, 236 TokenType.GEOGRAPHY, 237 TokenType.GEOMETRY, 238 TokenType.HLLSKETCH, 239 TokenType.HSTORE, 240 TokenType.PSEUDO_TYPE, 241 TokenType.SUPER, 242 TokenType.SERIAL, 243 TokenType.SMALLSERIAL, 244 TokenType.BIGSERIAL, 245 TokenType.XML, 246 TokenType.YEAR, 247 TokenType.UNIQUEIDENTIFIER, 248 TokenType.USERDEFINED, 249 TokenType.MONEY, 250 TokenType.SMALLMONEY, 251 TokenType.ROWVERSION, 252 TokenType.IMAGE, 253 TokenType.VARIANT, 254 TokenType.OBJECT, 255 TokenType.OBJECT_IDENTIFIER, 256 TokenType.INET, 257 TokenType.IPADDRESS, 258 TokenType.IPPREFIX, 259 TokenType.IPV4, 260 TokenType.IPV6, 261 TokenType.UNKNOWN, 262 TokenType.NULL, 263 *ENUM_TYPE_TOKENS, 264 *NESTED_TYPE_TOKENS, 265 *AGGREGATE_TYPE_TOKENS, 266 } 267 268 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 269 TokenType.BIGINT: TokenType.UBIGINT, 270 TokenType.INT: TokenType.UINT, 271 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 272 TokenType.SMALLINT: TokenType.USMALLINT, 273 TokenType.TINYINT: TokenType.UTINYINT, 274 TokenType.DECIMAL: TokenType.UDECIMAL, 275 } 276 277 SUBQUERY_PREDICATES = { 278 TokenType.ANY: exp.Any, 279 TokenType.ALL: exp.All, 280 TokenType.EXISTS: exp.Exists, 281 TokenType.SOME: exp.Any, 282 } 283 284 RESERVED_TOKENS = { 285 *Tokenizer.SINGLE_TOKENS.values(), 286 TokenType.SELECT, 287 } 288 289 DB_CREATABLES = { 290 TokenType.DATABASE, 291 TokenType.SCHEMA, 292 TokenType.TABLE, 293 TokenType.VIEW, 294 TokenType.MODEL, 295 TokenType.DICTIONARY, 296 } 297 298 CREATABLES = { 299 TokenType.COLUMN, 300 TokenType.CONSTRAINT, 301 TokenType.FUNCTION, 302 TokenType.INDEX, 303 TokenType.PROCEDURE, 304 TokenType.FOREIGN_KEY, 305 *DB_CREATABLES, 306 } 307 308 # Tokens that can represent identifiers 309 ID_VAR_TOKENS = { 310 TokenType.VAR, 311 TokenType.ANTI, 312 TokenType.APPLY, 313 TokenType.ASC, 314 TokenType.AUTO_INCREMENT, 315 TokenType.BEGIN, 316 TokenType.BPCHAR, 317 TokenType.CACHE, 318 TokenType.CASE, 319 TokenType.COLLATE, 320 TokenType.COMMAND, 321 TokenType.COMMENT, 322 TokenType.COMMIT, 323 TokenType.CONSTRAINT, 324 TokenType.DEFAULT, 325 TokenType.DELETE, 326 TokenType.DESC, 327 TokenType.DESCRIBE, 328 TokenType.DICTIONARY, 329 TokenType.DIV, 330 TokenType.END, 331 TokenType.EXECUTE, 332 TokenType.ESCAPE, 333 TokenType.FALSE, 334 TokenType.FIRST, 335 TokenType.FILTER, 336 TokenType.FINAL, 337 TokenType.FORMAT, 338 TokenType.FULL, 339 TokenType.IS, 340 TokenType.ISNULL, 341 TokenType.INTERVAL, 342 TokenType.KEEP, 343 TokenType.KILL, 344 TokenType.LEFT, 345 TokenType.LOAD, 346 TokenType.MERGE, 347 TokenType.NATURAL, 348 TokenType.NEXT, 349 TokenType.OFFSET, 350 TokenType.OPERATOR, 351 TokenType.ORDINALITY, 352 TokenType.OVERLAPS, 353 TokenType.OVERWRITE, 354 TokenType.PARTITION, 355 TokenType.PERCENT, 356 TokenType.PIVOT, 357 TokenType.PRAGMA, 358 TokenType.RANGE, 359 TokenType.RECURSIVE, 360 TokenType.REFERENCES, 361 TokenType.REFRESH, 362 TokenType.REPLACE, 363 TokenType.RIGHT, 364 TokenType.ROW, 365 TokenType.ROWS, 366 TokenType.SEMI, 367 TokenType.SET, 368 TokenType.SETTINGS, 369 TokenType.SHOW, 370 TokenType.TEMPORARY, 371 TokenType.TOP, 372 TokenType.TRUE, 373 TokenType.UNIQUE, 374 TokenType.UNPIVOT, 375 TokenType.UPDATE, 376 TokenType.USE, 377 TokenType.VOLATILE, 378 TokenType.WINDOW, 379 *CREATABLES, 380 *SUBQUERY_PREDICATES, 381 *TYPE_TOKENS, 382 *NO_PAREN_FUNCTIONS, 383 } 384 385 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 386 387 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 388 TokenType.ANTI, 389 TokenType.APPLY, 390 TokenType.ASOF, 391 TokenType.FULL, 392 TokenType.LEFT, 393 TokenType.LOCK, 394 TokenType.NATURAL, 395 TokenType.OFFSET, 396 TokenType.RIGHT, 397 TokenType.SEMI, 398 TokenType.WINDOW, 399 } 400 401 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 402 403 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 404 405 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 406 407 FUNC_TOKENS = { 408 TokenType.COLLATE, 409 TokenType.COMMAND, 410 TokenType.CURRENT_DATE, 411 TokenType.CURRENT_DATETIME, 412 TokenType.CURRENT_TIMESTAMP, 413 TokenType.CURRENT_TIME, 414 TokenType.CURRENT_USER, 415 TokenType.FILTER, 416 TokenType.FIRST, 417 TokenType.FORMAT, 418 TokenType.GLOB, 419 TokenType.IDENTIFIER, 420 TokenType.INDEX, 421 TokenType.ISNULL, 422 TokenType.ILIKE, 423 TokenType.INSERT, 424 TokenType.LIKE, 425 TokenType.MERGE, 426 TokenType.OFFSET, 427 TokenType.PRIMARY_KEY, 428 TokenType.RANGE, 429 TokenType.REPLACE, 430 TokenType.RLIKE, 431 TokenType.ROW, 432 TokenType.UNNEST, 433 TokenType.VAR, 434 TokenType.LEFT, 435 TokenType.RIGHT, 436 TokenType.DATE, 437 TokenType.DATETIME, 438 TokenType.TABLE, 439 TokenType.TIMESTAMP, 440 TokenType.TIMESTAMPTZ, 441 TokenType.WINDOW, 442 TokenType.XOR, 443 *TYPE_TOKENS, 444 *SUBQUERY_PREDICATES, 445 } 446 447 CONJUNCTION = { 448 TokenType.AND: exp.And, 449 TokenType.OR: exp.Or, 450 } 451 452 EQUALITY = { 453 TokenType.COLON_EQ: exp.PropertyEQ, 454 TokenType.EQ: exp.EQ, 455 TokenType.NEQ: exp.NEQ, 456 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 457 } 458 459 COMPARISON = { 460 TokenType.GT: exp.GT, 461 TokenType.GTE: exp.GTE, 462 TokenType.LT: exp.LT, 463 TokenType.LTE: exp.LTE, 464 } 465 466 BITWISE = { 467 TokenType.AMP: exp.BitwiseAnd, 468 TokenType.CARET: exp.BitwiseXor, 469 TokenType.PIPE: exp.BitwiseOr, 470 } 471 472 TERM = { 473 TokenType.DASH: exp.Sub, 474 TokenType.PLUS: exp.Add, 475 TokenType.MOD: exp.Mod, 476 TokenType.COLLATE: exp.Collate, 477 } 478 479 FACTOR = { 480 TokenType.DIV: exp.IntDiv, 481 TokenType.LR_ARROW: exp.Distance, 482 TokenType.SLASH: exp.Div, 483 TokenType.STAR: exp.Mul, 484 } 485 486 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 487 488 TIMES = { 489 TokenType.TIME, 490 TokenType.TIMETZ, 491 } 492 493 TIMESTAMPS = { 494 TokenType.TIMESTAMP, 495 TokenType.TIMESTAMPTZ, 496 TokenType.TIMESTAMPLTZ, 497 *TIMES, 498 } 499 500 SET_OPERATIONS = { 501 TokenType.UNION, 502 TokenType.INTERSECT, 503 TokenType.EXCEPT, 504 } 505 506 JOIN_METHODS = { 507 TokenType.NATURAL, 508 TokenType.ASOF, 509 } 510 511 JOIN_SIDES = { 512 TokenType.LEFT, 513 TokenType.RIGHT, 514 TokenType.FULL, 515 } 516 517 JOIN_KINDS = { 518 TokenType.INNER, 519 TokenType.OUTER, 520 TokenType.CROSS, 521 TokenType.SEMI, 522 TokenType.ANTI, 523 } 524 525 JOIN_HINTS: t.Set[str] = set() 526 527 LAMBDAS = { 528 TokenType.ARROW: lambda self, expressions: self.expression( 529 exp.Lambda, 530 this=self._replace_lambda( 531 self._parse_conjunction(), 532 {node.name for node in expressions}, 533 ), 534 expressions=expressions, 535 ), 536 TokenType.FARROW: lambda self, expressions: self.expression( 537 exp.Kwarg, 538 this=exp.var(expressions[0].name), 539 expression=self._parse_conjunction(), 540 ), 541 } 542 543 COLUMN_OPERATORS = { 544 TokenType.DOT: None, 545 TokenType.DCOLON: lambda self, this, to: self.expression( 546 exp.Cast if self.STRICT_CAST else exp.TryCast, 547 this=this, 548 to=to, 549 ), 550 TokenType.ARROW: lambda self, this, path: self.expression( 551 exp.JSONExtract, 552 this=this, 553 expression=self.dialect.to_json_path(path), 554 ), 555 TokenType.DARROW: lambda self, this, path: self.expression( 556 exp.JSONExtractScalar, 557 this=this, 558 expression=self.dialect.to_json_path(path), 559 ), 560 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 561 exp.JSONBExtract, 562 this=this, 563 expression=path, 564 ), 565 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 566 exp.JSONBExtractScalar, 567 this=this, 568 expression=path, 569 ), 570 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 571 exp.JSONBContains, 572 this=this, 573 expression=key, 574 ), 575 } 576 577 EXPRESSION_PARSERS = { 578 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 579 exp.Column: lambda self: self._parse_column(), 580 exp.Condition: lambda self: self._parse_conjunction(), 581 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 582 exp.Expression: lambda self: self._parse_statement(), 583 exp.From: lambda self: self._parse_from(), 584 exp.Group: lambda self: self._parse_group(), 585 exp.Having: lambda self: self._parse_having(), 586 exp.Identifier: lambda self: self._parse_id_var(), 587 exp.Join: lambda self: self._parse_join(), 588 exp.Lambda: lambda self: self._parse_lambda(), 589 exp.Lateral: lambda self: self._parse_lateral(), 590 exp.Limit: lambda self: self._parse_limit(), 591 exp.Offset: lambda self: self._parse_offset(), 592 exp.Order: lambda self: self._parse_order(), 593 exp.Ordered: lambda self: self._parse_ordered(), 594 exp.Properties: lambda self: self._parse_properties(), 595 exp.Qualify: lambda self: self._parse_qualify(), 596 exp.Returning: lambda self: self._parse_returning(), 597 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 598 exp.Table: lambda self: self._parse_table_parts(), 599 exp.TableAlias: lambda self: self._parse_table_alias(), 600 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 601 exp.Where: lambda self: self._parse_where(), 602 exp.Window: lambda self: self._parse_named_window(), 603 exp.With: lambda self: self._parse_with(), 604 "JOIN_TYPE": lambda self: self._parse_join_parts(), 605 } 606 607 STATEMENT_PARSERS = { 608 TokenType.ALTER: lambda self: self._parse_alter(), 609 TokenType.BEGIN: lambda self: self._parse_transaction(), 610 TokenType.CACHE: lambda self: self._parse_cache(), 611 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 612 TokenType.COMMENT: lambda self: self._parse_comment(), 613 TokenType.CREATE: lambda self: self._parse_create(), 614 TokenType.DELETE: lambda self: self._parse_delete(), 615 TokenType.DESC: lambda self: self._parse_describe(), 616 TokenType.DESCRIBE: lambda self: self._parse_describe(), 617 TokenType.DROP: lambda self: self._parse_drop(), 618 TokenType.INSERT: lambda self: self._parse_insert(), 619 TokenType.KILL: lambda self: self._parse_kill(), 620 TokenType.LOAD: lambda self: self._parse_load(), 621 TokenType.MERGE: lambda self: self._parse_merge(), 622 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 623 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 624 TokenType.REFRESH: lambda self: self._parse_refresh(), 625 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 626 TokenType.SET: lambda self: self._parse_set(), 627 TokenType.UNCACHE: lambda self: self._parse_uncache(), 628 TokenType.UPDATE: lambda self: self._parse_update(), 629 TokenType.USE: lambda self: self.expression( 630 exp.Use, 631 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 632 and exp.var(self._prev.text), 633 this=self._parse_table(schema=False), 634 ), 635 } 636 637 UNARY_PARSERS = { 638 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 639 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 640 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 641 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 642 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 643 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 644 } 645 646 PRIMARY_PARSERS = { 647 TokenType.STRING: lambda self, token: self.expression( 648 exp.Literal, this=token.text, is_string=True 649 ), 650 TokenType.NUMBER: lambda self, token: self.expression( 651 exp.Literal, this=token.text, is_string=False 652 ), 653 TokenType.STAR: lambda self, _: self.expression( 654 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 655 ), 656 TokenType.NULL: lambda self, _: self.expression(exp.Null), 657 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 658 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 659 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 660 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 661 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 662 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 663 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 664 exp.National, this=token.text 665 ), 666 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 667 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 668 exp.RawString, this=token.text 669 ), 670 TokenType.UNICODE_STRING: lambda self, token: self.expression( 671 exp.UnicodeString, 672 this=token.text, 673 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 674 ), 675 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 676 } 677 678 PLACEHOLDER_PARSERS = { 679 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 680 TokenType.PARAMETER: lambda self: self._parse_parameter(), 681 TokenType.COLON: lambda self: ( 682 self.expression(exp.Placeholder, this=self._prev.text) 683 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 684 else None 685 ), 686 } 687 688 RANGE_PARSERS = { 689 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 690 TokenType.GLOB: binary_range_parser(exp.Glob), 691 TokenType.ILIKE: binary_range_parser(exp.ILike), 692 TokenType.IN: lambda self, this: self._parse_in(this), 693 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 694 TokenType.IS: lambda self, this: self._parse_is(this), 695 TokenType.LIKE: binary_range_parser(exp.Like), 696 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 697 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 698 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 699 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 700 } 701 702 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 703 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 704 "AUTO": lambda self: self._parse_auto_property(), 705 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 706 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 707 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 708 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 709 "CHECKSUM": lambda self: self._parse_checksum(), 710 "CLUSTER BY": lambda self: self._parse_cluster(), 711 "CLUSTERED": lambda self: self._parse_clustered_by(), 712 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 713 exp.CollateProperty, **kwargs 714 ), 715 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 716 "CONTAINS": lambda self: self._parse_contains_property(), 717 "COPY": lambda self: self._parse_copy_property(), 718 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 719 "DEFINER": lambda self: self._parse_definer(), 720 "DETERMINISTIC": lambda self: self.expression( 721 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 722 ), 723 "DISTKEY": lambda self: self._parse_distkey(), 724 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 725 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 726 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 727 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 728 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 729 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 730 "FREESPACE": lambda self: self._parse_freespace(), 731 "HEAP": lambda self: self.expression(exp.HeapProperty), 732 "IMMUTABLE": lambda self: self.expression( 733 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 734 ), 735 "INHERITS": lambda self: self.expression( 736 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 737 ), 738 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 739 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 740 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 741 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 742 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 743 "LIKE": lambda self: self._parse_create_like(), 744 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 745 "LOCK": lambda self: self._parse_locking(), 746 "LOCKING": lambda self: self._parse_locking(), 747 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 748 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 749 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 750 "MODIFIES": lambda self: self._parse_modifies_property(), 751 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 752 "NO": lambda self: self._parse_no_property(), 753 "ON": lambda self: self._parse_on_property(), 754 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 755 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 756 "PARTITION": lambda self: self._parse_partitioned_of(), 757 "PARTITION BY": lambda self: self._parse_partitioned_by(), 758 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 759 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 760 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 761 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 762 "READS": lambda self: self._parse_reads_property(), 763 "REMOTE": lambda self: self._parse_remote_with_connection(), 764 "RETURNS": lambda self: self._parse_returns(), 765 "ROW": lambda self: self._parse_row(), 766 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 767 "SAMPLE": lambda self: self.expression( 768 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 769 ), 770 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 771 "SETTINGS": lambda self: self.expression( 772 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 773 ), 774 "SORTKEY": lambda self: self._parse_sortkey(), 775 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 776 "STABLE": lambda self: self.expression( 777 exp.StabilityProperty, this=exp.Literal.string("STABLE") 778 ), 779 "STORED": lambda self: self._parse_stored(), 780 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 781 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 782 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 783 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 784 "TO": lambda self: self._parse_to_table(), 785 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 786 "TRANSFORM": lambda self: self.expression( 787 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 788 ), 789 "TTL": lambda self: self._parse_ttl(), 790 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 791 "VOLATILE": lambda self: self._parse_volatile_property(), 792 "WITH": lambda self: self._parse_with_property(), 793 } 794 795 CONSTRAINT_PARSERS = { 796 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 797 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 798 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 799 "CHARACTER SET": lambda self: self.expression( 800 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 801 ), 802 "CHECK": lambda self: self.expression( 803 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 804 ), 805 "COLLATE": lambda self: self.expression( 806 exp.CollateColumnConstraint, this=self._parse_var() 807 ), 808 "COMMENT": lambda self: self.expression( 809 exp.CommentColumnConstraint, this=self._parse_string() 810 ), 811 "COMPRESS": lambda self: self._parse_compress(), 812 "CLUSTERED": lambda self: self.expression( 813 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 814 ), 815 "NONCLUSTERED": lambda self: self.expression( 816 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 817 ), 818 "DEFAULT": lambda self: self.expression( 819 exp.DefaultColumnConstraint, this=self._parse_bitwise() 820 ), 821 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 822 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 823 "FORMAT": lambda self: self.expression( 824 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 825 ), 826 "GENERATED": lambda self: self._parse_generated_as_identity(), 827 "IDENTITY": lambda self: self._parse_auto_increment(), 828 "INLINE": lambda self: self._parse_inline(), 829 "LIKE": lambda self: self._parse_create_like(), 830 "NOT": lambda self: self._parse_not_constraint(), 831 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 832 "ON": lambda self: ( 833 self._match(TokenType.UPDATE) 834 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 835 ) 836 or self.expression(exp.OnProperty, this=self._parse_id_var()), 837 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 838 "PERIOD": lambda self: self._parse_period_for_system_time(), 839 "PRIMARY KEY": lambda self: self._parse_primary_key(), 840 "REFERENCES": lambda self: self._parse_references(match=False), 841 "TITLE": lambda self: self.expression( 842 exp.TitleColumnConstraint, this=self._parse_var_or_string() 843 ), 844 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 845 "UNIQUE": lambda self: self._parse_unique(), 846 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 847 "WITH": lambda self: self.expression( 848 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 849 ), 850 } 851 852 ALTER_PARSERS = { 853 "ADD": lambda self: self._parse_alter_table_add(), 854 "ALTER": lambda self: self._parse_alter_table_alter(), 855 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 856 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 857 "DROP": lambda self: self._parse_alter_table_drop(), 858 "RENAME": lambda self: self._parse_alter_table_rename(), 859 } 860 861 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 862 863 NO_PAREN_FUNCTION_PARSERS = { 864 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 865 "CASE": lambda self: self._parse_case(), 866 "IF": lambda self: self._parse_if(), 867 "NEXT": lambda self: self._parse_next_value_for(), 868 } 869 870 INVALID_FUNC_NAME_TOKENS = { 871 TokenType.IDENTIFIER, 872 TokenType.STRING, 873 } 874 875 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 876 877 FUNCTION_PARSERS = { 878 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 879 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 880 "DECODE": lambda self: self._parse_decode(), 881 "EXTRACT": lambda self: self._parse_extract(), 882 "JSON_OBJECT": lambda self: self._parse_json_object(), 883 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 884 "JSON_TABLE": lambda self: self._parse_json_table(), 885 "MATCH": lambda self: self._parse_match_against(), 886 "OPENJSON": lambda self: self._parse_open_json(), 887 "POSITION": lambda self: self._parse_position(), 888 "PREDICT": lambda self: self._parse_predict(), 889 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 890 "STRING_AGG": lambda self: self._parse_string_agg(), 891 "SUBSTRING": lambda self: self._parse_substring(), 892 "TRIM": lambda self: self._parse_trim(), 893 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 894 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 895 } 896 897 QUERY_MODIFIER_PARSERS = { 898 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 899 TokenType.WHERE: lambda self: ("where", self._parse_where()), 900 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 901 TokenType.HAVING: lambda self: ("having", self._parse_having()), 902 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 903 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 904 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 905 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 906 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 907 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 908 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 909 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 910 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 911 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 912 TokenType.CLUSTER_BY: lambda self: ( 913 "cluster", 914 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 915 ), 916 TokenType.DISTRIBUTE_BY: lambda self: ( 917 "distribute", 918 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 919 ), 920 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 921 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 922 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 923 } 924 925 SET_PARSERS = { 926 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 927 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 928 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 929 "TRANSACTION": lambda self: self._parse_set_transaction(), 930 } 931 932 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 933 934 TYPE_LITERAL_PARSERS = { 935 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 936 } 937 938 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 939 940 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 941 942 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 943 944 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 945 TRANSACTION_CHARACTERISTICS = { 946 "ISOLATION LEVEL REPEATABLE READ", 947 "ISOLATION LEVEL READ COMMITTED", 948 "ISOLATION LEVEL READ UNCOMMITTED", 949 "ISOLATION LEVEL SERIALIZABLE", 950 "READ WRITE", 951 "READ ONLY", 952 } 953 954 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 955 956 CLONE_KEYWORDS = {"CLONE", "COPY"} 957 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 958 959 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 960 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 961 962 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 963 964 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 965 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 966 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 967 968 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 969 970 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 971 972 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 973 974 DISTINCT_TOKENS = {TokenType.DISTINCT} 975 976 NULL_TOKENS = {TokenType.NULL} 977 978 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 979 980 STRICT_CAST = True 981 982 PREFIXED_PIVOT_COLUMNS = False 983 IDENTIFY_PIVOT_STRINGS = False 984 985 LOG_DEFAULTS_TO_LN = False 986 987 # Whether or not ADD is present for each column added by ALTER TABLE 988 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 989 990 # Whether or not the table sample clause expects CSV syntax 991 TABLESAMPLE_CSV = False 992 993 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 994 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 995 996 # Whether the TRIM function expects the characters to trim as its first argument 997 TRIM_PATTERN_FIRST = False 998 999 # Whether or not string aliases are supported `SELECT COUNT(*) 'count'` 1000 STRING_ALIASES = False 1001 1002 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1003 MODIFIERS_ATTACHED_TO_UNION = True 1004 UNION_MODIFIERS = {"order", "limit", "offset"} 1005 1006 # Parses no parenthesis if statements as commands 1007 NO_PAREN_IF_COMMANDS = True 1008 1009 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1010 # If this is True and '(' is not found, the keyword will be treated as an identifier 1011 VALUES_FOLLOWED_BY_PAREN = True 1012 1013 __slots__ = ( 1014 "error_level", 1015 "error_message_context", 1016 "max_errors", 1017 "dialect", 1018 "sql", 1019 "errors", 1020 "_tokens", 1021 "_index", 1022 "_curr", 1023 "_next", 1024 "_prev", 1025 "_prev_comments", 1026 ) 1027 1028 # Autofilled 1029 SHOW_TRIE: t.Dict = {} 1030 SET_TRIE: t.Dict = {} 1031 1032 def __init__( 1033 self, 1034 error_level: t.Optional[ErrorLevel] = None, 1035 error_message_context: int = 100, 1036 max_errors: int = 3, 1037 dialect: DialectType = None, 1038 ): 1039 from sqlglot.dialects import Dialect 1040 1041 self.error_level = error_level or ErrorLevel.IMMEDIATE 1042 self.error_message_context = error_message_context 1043 self.max_errors = max_errors 1044 self.dialect = Dialect.get_or_raise(dialect) 1045 self.reset() 1046 1047 def reset(self): 1048 self.sql = "" 1049 self.errors = [] 1050 self._tokens = [] 1051 self._index = 0 1052 self._curr = None 1053 self._next = None 1054 self._prev = None 1055 self._prev_comments = None 1056 1057 def parse( 1058 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1059 ) -> t.List[t.Optional[exp.Expression]]: 1060 """ 1061 Parses a list of tokens and returns a list of syntax trees, one tree 1062 per parsed SQL statement. 1063 1064 Args: 1065 raw_tokens: The list of tokens. 1066 sql: The original SQL string, used to produce helpful debug messages. 1067 1068 Returns: 1069 The list of the produced syntax trees. 1070 """ 1071 return self._parse( 1072 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1073 ) 1074 1075 def parse_into( 1076 self, 1077 expression_types: exp.IntoType, 1078 raw_tokens: t.List[Token], 1079 sql: t.Optional[str] = None, 1080 ) -> t.List[t.Optional[exp.Expression]]: 1081 """ 1082 Parses a list of tokens into a given Expression type. If a collection of Expression 1083 types is given instead, this method will try to parse the token list into each one 1084 of them, stopping at the first for which the parsing succeeds. 1085 1086 Args: 1087 expression_types: The expression type(s) to try and parse the token list into. 1088 raw_tokens: The list of tokens. 1089 sql: The original SQL string, used to produce helpful debug messages. 1090 1091 Returns: 1092 The target Expression. 1093 """ 1094 errors = [] 1095 for expression_type in ensure_list(expression_types): 1096 parser = self.EXPRESSION_PARSERS.get(expression_type) 1097 if not parser: 1098 raise TypeError(f"No parser registered for {expression_type}") 1099 1100 try: 1101 return self._parse(parser, raw_tokens, sql) 1102 except ParseError as e: 1103 e.errors[0]["into_expression"] = expression_type 1104 errors.append(e) 1105 1106 raise ParseError( 1107 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1108 errors=merge_errors(errors), 1109 ) from errors[-1] 1110 1111 def _parse( 1112 self, 1113 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1114 raw_tokens: t.List[Token], 1115 sql: t.Optional[str] = None, 1116 ) -> t.List[t.Optional[exp.Expression]]: 1117 self.reset() 1118 self.sql = sql or "" 1119 1120 total = len(raw_tokens) 1121 chunks: t.List[t.List[Token]] = [[]] 1122 1123 for i, token in enumerate(raw_tokens): 1124 if token.token_type == TokenType.SEMICOLON: 1125 if i < total - 1: 1126 chunks.append([]) 1127 else: 1128 chunks[-1].append(token) 1129 1130 expressions = [] 1131 1132 for tokens in chunks: 1133 self._index = -1 1134 self._tokens = tokens 1135 self._advance() 1136 1137 expressions.append(parse_method(self)) 1138 1139 if self._index < len(self._tokens): 1140 self.raise_error("Invalid expression / Unexpected token") 1141 1142 self.check_errors() 1143 1144 return expressions 1145 1146 def check_errors(self) -> None: 1147 """Logs or raises any found errors, depending on the chosen error level setting.""" 1148 if self.error_level == ErrorLevel.WARN: 1149 for error in self.errors: 1150 logger.error(str(error)) 1151 elif self.error_level == ErrorLevel.RAISE and self.errors: 1152 raise ParseError( 1153 concat_messages(self.errors, self.max_errors), 1154 errors=merge_errors(self.errors), 1155 ) 1156 1157 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1158 """ 1159 Appends an error in the list of recorded errors or raises it, depending on the chosen 1160 error level setting. 1161 """ 1162 token = token or self._curr or self._prev or Token.string("") 1163 start = token.start 1164 end = token.end + 1 1165 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1166 highlight = self.sql[start:end] 1167 end_context = self.sql[end : end + self.error_message_context] 1168 1169 error = ParseError.new( 1170 f"{message}. Line {token.line}, Col: {token.col}.\n" 1171 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1172 description=message, 1173 line=token.line, 1174 col=token.col, 1175 start_context=start_context, 1176 highlight=highlight, 1177 end_context=end_context, 1178 ) 1179 1180 if self.error_level == ErrorLevel.IMMEDIATE: 1181 raise error 1182 1183 self.errors.append(error) 1184 1185 def expression( 1186 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1187 ) -> E: 1188 """ 1189 Creates a new, validated Expression. 1190 1191 Args: 1192 exp_class: The expression class to instantiate. 1193 comments: An optional list of comments to attach to the expression. 1194 kwargs: The arguments to set for the expression along with their respective values. 1195 1196 Returns: 1197 The target expression. 1198 """ 1199 instance = exp_class(**kwargs) 1200 instance.add_comments(comments) if comments else self._add_comments(instance) 1201 return self.validate_expression(instance) 1202 1203 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1204 if expression and self._prev_comments: 1205 expression.add_comments(self._prev_comments) 1206 self._prev_comments = None 1207 1208 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1209 """ 1210 Validates an Expression, making sure that all its mandatory arguments are set. 1211 1212 Args: 1213 expression: The expression to validate. 1214 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1215 1216 Returns: 1217 The validated expression. 1218 """ 1219 if self.error_level != ErrorLevel.IGNORE: 1220 for error_message in expression.error_messages(args): 1221 self.raise_error(error_message) 1222 1223 return expression 1224 1225 def _find_sql(self, start: Token, end: Token) -> str: 1226 return self.sql[start.start : end.end + 1] 1227 1228 def _is_connected(self) -> bool: 1229 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1230 1231 def _advance(self, times: int = 1) -> None: 1232 self._index += times 1233 self._curr = seq_get(self._tokens, self._index) 1234 self._next = seq_get(self._tokens, self._index + 1) 1235 1236 if self._index > 0: 1237 self._prev = self._tokens[self._index - 1] 1238 self._prev_comments = self._prev.comments 1239 else: 1240 self._prev = None 1241 self._prev_comments = None 1242 1243 def _retreat(self, index: int) -> None: 1244 if index != self._index: 1245 self._advance(index - self._index) 1246 1247 def _warn_unsupported(self) -> None: 1248 if len(self._tokens) <= 1: 1249 return 1250 1251 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1252 # interested in emitting a warning for the one being currently processed. 1253 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1254 1255 logger.warning( 1256 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1257 ) 1258 1259 def _parse_command(self) -> exp.Command: 1260 self._warn_unsupported() 1261 return self.expression( 1262 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1263 ) 1264 1265 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1266 start = self._prev 1267 exists = self._parse_exists() if allow_exists else None 1268 1269 self._match(TokenType.ON) 1270 1271 kind = self._match_set(self.CREATABLES) and self._prev 1272 if not kind: 1273 return self._parse_as_command(start) 1274 1275 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1276 this = self._parse_user_defined_function(kind=kind.token_type) 1277 elif kind.token_type == TokenType.TABLE: 1278 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1279 elif kind.token_type == TokenType.COLUMN: 1280 this = self._parse_column() 1281 else: 1282 this = self._parse_id_var() 1283 1284 self._match(TokenType.IS) 1285 1286 return self.expression( 1287 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1288 ) 1289 1290 def _parse_to_table( 1291 self, 1292 ) -> exp.ToTableProperty: 1293 table = self._parse_table_parts(schema=True) 1294 return self.expression(exp.ToTableProperty, this=table) 1295 1296 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1297 def _parse_ttl(self) -> exp.Expression: 1298 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1299 this = self._parse_bitwise() 1300 1301 if self._match_text_seq("DELETE"): 1302 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1303 if self._match_text_seq("RECOMPRESS"): 1304 return self.expression( 1305 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1306 ) 1307 if self._match_text_seq("TO", "DISK"): 1308 return self.expression( 1309 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1310 ) 1311 if self._match_text_seq("TO", "VOLUME"): 1312 return self.expression( 1313 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1314 ) 1315 1316 return this 1317 1318 expressions = self._parse_csv(_parse_ttl_action) 1319 where = self._parse_where() 1320 group = self._parse_group() 1321 1322 aggregates = None 1323 if group and self._match(TokenType.SET): 1324 aggregates = self._parse_csv(self._parse_set_item) 1325 1326 return self.expression( 1327 exp.MergeTreeTTL, 1328 expressions=expressions, 1329 where=where, 1330 group=group, 1331 aggregates=aggregates, 1332 ) 1333 1334 def _parse_statement(self) -> t.Optional[exp.Expression]: 1335 if self._curr is None: 1336 return None 1337 1338 if self._match_set(self.STATEMENT_PARSERS): 1339 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1340 1341 if self._match_set(Tokenizer.COMMANDS): 1342 return self._parse_command() 1343 1344 expression = self._parse_expression() 1345 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1346 return self._parse_query_modifiers(expression) 1347 1348 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1349 start = self._prev 1350 temporary = self._match(TokenType.TEMPORARY) 1351 materialized = self._match_text_seq("MATERIALIZED") 1352 1353 kind = self._match_set(self.CREATABLES) and self._prev.text 1354 if not kind: 1355 return self._parse_as_command(start) 1356 1357 return self.expression( 1358 exp.Drop, 1359 comments=start.comments, 1360 exists=exists or self._parse_exists(), 1361 this=self._parse_table( 1362 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1363 ), 1364 kind=kind, 1365 temporary=temporary, 1366 materialized=materialized, 1367 cascade=self._match_text_seq("CASCADE"), 1368 constraints=self._match_text_seq("CONSTRAINTS"), 1369 purge=self._match_text_seq("PURGE"), 1370 ) 1371 1372 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1373 return ( 1374 self._match_text_seq("IF") 1375 and (not not_ or self._match(TokenType.NOT)) 1376 and self._match(TokenType.EXISTS) 1377 ) 1378 1379 def _parse_create(self) -> exp.Create | exp.Command: 1380 # Note: this can't be None because we've matched a statement parser 1381 start = self._prev 1382 comments = self._prev_comments 1383 1384 replace = ( 1385 start.token_type == TokenType.REPLACE 1386 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1387 or self._match_pair(TokenType.OR, TokenType.ALTER) 1388 ) 1389 unique = self._match(TokenType.UNIQUE) 1390 1391 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1392 self._advance() 1393 1394 properties = None 1395 create_token = self._match_set(self.CREATABLES) and self._prev 1396 1397 if not create_token: 1398 # exp.Properties.Location.POST_CREATE 1399 properties = self._parse_properties() 1400 create_token = self._match_set(self.CREATABLES) and self._prev 1401 1402 if not properties or not create_token: 1403 return self._parse_as_command(start) 1404 1405 exists = self._parse_exists(not_=True) 1406 this = None 1407 expression: t.Optional[exp.Expression] = None 1408 indexes = None 1409 no_schema_binding = None 1410 begin = None 1411 end = None 1412 clone = None 1413 1414 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1415 nonlocal properties 1416 if properties and temp_props: 1417 properties.expressions.extend(temp_props.expressions) 1418 elif temp_props: 1419 properties = temp_props 1420 1421 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1422 this = self._parse_user_defined_function(kind=create_token.token_type) 1423 1424 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1425 extend_props(self._parse_properties()) 1426 1427 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1428 1429 if not expression: 1430 if self._match(TokenType.COMMAND): 1431 expression = self._parse_as_command(self._prev) 1432 else: 1433 begin = self._match(TokenType.BEGIN) 1434 return_ = self._match_text_seq("RETURN") 1435 1436 if self._match(TokenType.STRING, advance=False): 1437 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1438 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1439 expression = self._parse_string() 1440 extend_props(self._parse_properties()) 1441 else: 1442 expression = self._parse_statement() 1443 1444 end = self._match_text_seq("END") 1445 1446 if return_: 1447 expression = self.expression(exp.Return, this=expression) 1448 elif create_token.token_type == TokenType.INDEX: 1449 this = self._parse_index(index=self._parse_id_var()) 1450 elif create_token.token_type in self.DB_CREATABLES: 1451 table_parts = self._parse_table_parts( 1452 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1453 ) 1454 1455 # exp.Properties.Location.POST_NAME 1456 self._match(TokenType.COMMA) 1457 extend_props(self._parse_properties(before=True)) 1458 1459 this = self._parse_schema(this=table_parts) 1460 1461 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1462 extend_props(self._parse_properties()) 1463 1464 self._match(TokenType.ALIAS) 1465 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1466 # exp.Properties.Location.POST_ALIAS 1467 extend_props(self._parse_properties()) 1468 1469 expression = self._parse_ddl_select() 1470 1471 if create_token.token_type == TokenType.TABLE: 1472 # exp.Properties.Location.POST_EXPRESSION 1473 extend_props(self._parse_properties()) 1474 1475 indexes = [] 1476 while True: 1477 index = self._parse_index() 1478 1479 # exp.Properties.Location.POST_INDEX 1480 extend_props(self._parse_properties()) 1481 1482 if not index: 1483 break 1484 else: 1485 self._match(TokenType.COMMA) 1486 indexes.append(index) 1487 elif create_token.token_type == TokenType.VIEW: 1488 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1489 no_schema_binding = True 1490 1491 shallow = self._match_text_seq("SHALLOW") 1492 1493 if self._match_texts(self.CLONE_KEYWORDS): 1494 copy = self._prev.text.lower() == "copy" 1495 clone = self.expression( 1496 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1497 ) 1498 1499 if self._curr: 1500 return self._parse_as_command(start) 1501 1502 return self.expression( 1503 exp.Create, 1504 comments=comments, 1505 this=this, 1506 kind=create_token.text.upper(), 1507 replace=replace, 1508 unique=unique, 1509 expression=expression, 1510 exists=exists, 1511 properties=properties, 1512 indexes=indexes, 1513 no_schema_binding=no_schema_binding, 1514 begin=begin, 1515 end=end, 1516 clone=clone, 1517 ) 1518 1519 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1520 # only used for teradata currently 1521 self._match(TokenType.COMMA) 1522 1523 kwargs = { 1524 "no": self._match_text_seq("NO"), 1525 "dual": self._match_text_seq("DUAL"), 1526 "before": self._match_text_seq("BEFORE"), 1527 "default": self._match_text_seq("DEFAULT"), 1528 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1529 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1530 "after": self._match_text_seq("AFTER"), 1531 "minimum": self._match_texts(("MIN", "MINIMUM")), 1532 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1533 } 1534 1535 if self._match_texts(self.PROPERTY_PARSERS): 1536 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1537 try: 1538 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1539 except TypeError: 1540 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1541 1542 return None 1543 1544 def _parse_property(self) -> t.Optional[exp.Expression]: 1545 if self._match_texts(self.PROPERTY_PARSERS): 1546 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1547 1548 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1549 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1550 1551 if self._match_text_seq("COMPOUND", "SORTKEY"): 1552 return self._parse_sortkey(compound=True) 1553 1554 if self._match_text_seq("SQL", "SECURITY"): 1555 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1556 1557 index = self._index 1558 key = self._parse_column() 1559 1560 if not self._match(TokenType.EQ): 1561 self._retreat(index) 1562 return None 1563 1564 return self.expression( 1565 exp.Property, 1566 this=key.to_dot() if isinstance(key, exp.Column) else key, 1567 value=self._parse_column() or self._parse_var(any_token=True), 1568 ) 1569 1570 def _parse_stored(self) -> exp.FileFormatProperty: 1571 self._match(TokenType.ALIAS) 1572 1573 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1574 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1575 1576 return self.expression( 1577 exp.FileFormatProperty, 1578 this=( 1579 self.expression( 1580 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1581 ) 1582 if input_format or output_format 1583 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1584 ), 1585 ) 1586 1587 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1588 self._match(TokenType.EQ) 1589 self._match(TokenType.ALIAS) 1590 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1591 1592 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1593 properties = [] 1594 while True: 1595 if before: 1596 prop = self._parse_property_before() 1597 else: 1598 prop = self._parse_property() 1599 1600 if not prop: 1601 break 1602 for p in ensure_list(prop): 1603 properties.append(p) 1604 1605 if properties: 1606 return self.expression(exp.Properties, expressions=properties) 1607 1608 return None 1609 1610 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1611 return self.expression( 1612 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1613 ) 1614 1615 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1616 if self._index >= 2: 1617 pre_volatile_token = self._tokens[self._index - 2] 1618 else: 1619 pre_volatile_token = None 1620 1621 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1622 return exp.VolatileProperty() 1623 1624 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1625 1626 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1627 self._match_pair(TokenType.EQ, TokenType.ON) 1628 1629 prop = self.expression(exp.WithSystemVersioningProperty) 1630 if self._match(TokenType.L_PAREN): 1631 self._match_text_seq("HISTORY_TABLE", "=") 1632 prop.set("this", self._parse_table_parts()) 1633 1634 if self._match(TokenType.COMMA): 1635 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1636 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1637 1638 self._match_r_paren() 1639 1640 return prop 1641 1642 def _parse_with_property( 1643 self, 1644 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1645 if self._match(TokenType.L_PAREN, advance=False): 1646 return self._parse_wrapped_csv(self._parse_property) 1647 1648 if self._match_text_seq("JOURNAL"): 1649 return self._parse_withjournaltable() 1650 1651 if self._match_text_seq("DATA"): 1652 return self._parse_withdata(no=False) 1653 elif self._match_text_seq("NO", "DATA"): 1654 return self._parse_withdata(no=True) 1655 1656 if not self._next: 1657 return None 1658 1659 return self._parse_withisolatedloading() 1660 1661 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1662 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1663 self._match(TokenType.EQ) 1664 1665 user = self._parse_id_var() 1666 self._match(TokenType.PARAMETER) 1667 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1668 1669 if not user or not host: 1670 return None 1671 1672 return exp.DefinerProperty(this=f"{user}@{host}") 1673 1674 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1675 self._match(TokenType.TABLE) 1676 self._match(TokenType.EQ) 1677 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1678 1679 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1680 return self.expression(exp.LogProperty, no=no) 1681 1682 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1683 return self.expression(exp.JournalProperty, **kwargs) 1684 1685 def _parse_checksum(self) -> exp.ChecksumProperty: 1686 self._match(TokenType.EQ) 1687 1688 on = None 1689 if self._match(TokenType.ON): 1690 on = True 1691 elif self._match_text_seq("OFF"): 1692 on = False 1693 1694 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1695 1696 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1697 return self.expression( 1698 exp.Cluster, 1699 expressions=( 1700 self._parse_wrapped_csv(self._parse_ordered) 1701 if wrapped 1702 else self._parse_csv(self._parse_ordered) 1703 ), 1704 ) 1705 1706 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1707 self._match_text_seq("BY") 1708 1709 self._match_l_paren() 1710 expressions = self._parse_csv(self._parse_column) 1711 self._match_r_paren() 1712 1713 if self._match_text_seq("SORTED", "BY"): 1714 self._match_l_paren() 1715 sorted_by = self._parse_csv(self._parse_ordered) 1716 self._match_r_paren() 1717 else: 1718 sorted_by = None 1719 1720 self._match(TokenType.INTO) 1721 buckets = self._parse_number() 1722 self._match_text_seq("BUCKETS") 1723 1724 return self.expression( 1725 exp.ClusteredByProperty, 1726 expressions=expressions, 1727 sorted_by=sorted_by, 1728 buckets=buckets, 1729 ) 1730 1731 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1732 if not self._match_text_seq("GRANTS"): 1733 self._retreat(self._index - 1) 1734 return None 1735 1736 return self.expression(exp.CopyGrantsProperty) 1737 1738 def _parse_freespace(self) -> exp.FreespaceProperty: 1739 self._match(TokenType.EQ) 1740 return self.expression( 1741 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1742 ) 1743 1744 def _parse_mergeblockratio( 1745 self, no: bool = False, default: bool = False 1746 ) -> exp.MergeBlockRatioProperty: 1747 if self._match(TokenType.EQ): 1748 return self.expression( 1749 exp.MergeBlockRatioProperty, 1750 this=self._parse_number(), 1751 percent=self._match(TokenType.PERCENT), 1752 ) 1753 1754 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1755 1756 def _parse_datablocksize( 1757 self, 1758 default: t.Optional[bool] = None, 1759 minimum: t.Optional[bool] = None, 1760 maximum: t.Optional[bool] = None, 1761 ) -> exp.DataBlocksizeProperty: 1762 self._match(TokenType.EQ) 1763 size = self._parse_number() 1764 1765 units = None 1766 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1767 units = self._prev.text 1768 1769 return self.expression( 1770 exp.DataBlocksizeProperty, 1771 size=size, 1772 units=units, 1773 default=default, 1774 minimum=minimum, 1775 maximum=maximum, 1776 ) 1777 1778 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1779 self._match(TokenType.EQ) 1780 always = self._match_text_seq("ALWAYS") 1781 manual = self._match_text_seq("MANUAL") 1782 never = self._match_text_seq("NEVER") 1783 default = self._match_text_seq("DEFAULT") 1784 1785 autotemp = None 1786 if self._match_text_seq("AUTOTEMP"): 1787 autotemp = self._parse_schema() 1788 1789 return self.expression( 1790 exp.BlockCompressionProperty, 1791 always=always, 1792 manual=manual, 1793 never=never, 1794 default=default, 1795 autotemp=autotemp, 1796 ) 1797 1798 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1799 no = self._match_text_seq("NO") 1800 concurrent = self._match_text_seq("CONCURRENT") 1801 self._match_text_seq("ISOLATED", "LOADING") 1802 for_all = self._match_text_seq("FOR", "ALL") 1803 for_insert = self._match_text_seq("FOR", "INSERT") 1804 for_none = self._match_text_seq("FOR", "NONE") 1805 return self.expression( 1806 exp.IsolatedLoadingProperty, 1807 no=no, 1808 concurrent=concurrent, 1809 for_all=for_all, 1810 for_insert=for_insert, 1811 for_none=for_none, 1812 ) 1813 1814 def _parse_locking(self) -> exp.LockingProperty: 1815 if self._match(TokenType.TABLE): 1816 kind = "TABLE" 1817 elif self._match(TokenType.VIEW): 1818 kind = "VIEW" 1819 elif self._match(TokenType.ROW): 1820 kind = "ROW" 1821 elif self._match_text_seq("DATABASE"): 1822 kind = "DATABASE" 1823 else: 1824 kind = None 1825 1826 if kind in ("DATABASE", "TABLE", "VIEW"): 1827 this = self._parse_table_parts() 1828 else: 1829 this = None 1830 1831 if self._match(TokenType.FOR): 1832 for_or_in = "FOR" 1833 elif self._match(TokenType.IN): 1834 for_or_in = "IN" 1835 else: 1836 for_or_in = None 1837 1838 if self._match_text_seq("ACCESS"): 1839 lock_type = "ACCESS" 1840 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1841 lock_type = "EXCLUSIVE" 1842 elif self._match_text_seq("SHARE"): 1843 lock_type = "SHARE" 1844 elif self._match_text_seq("READ"): 1845 lock_type = "READ" 1846 elif self._match_text_seq("WRITE"): 1847 lock_type = "WRITE" 1848 elif self._match_text_seq("CHECKSUM"): 1849 lock_type = "CHECKSUM" 1850 else: 1851 lock_type = None 1852 1853 override = self._match_text_seq("OVERRIDE") 1854 1855 return self.expression( 1856 exp.LockingProperty, 1857 this=this, 1858 kind=kind, 1859 for_or_in=for_or_in, 1860 lock_type=lock_type, 1861 override=override, 1862 ) 1863 1864 def _parse_partition_by(self) -> t.List[exp.Expression]: 1865 if self._match(TokenType.PARTITION_BY): 1866 return self._parse_csv(self._parse_conjunction) 1867 return [] 1868 1869 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1870 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1871 if self._match_text_seq("MINVALUE"): 1872 return exp.var("MINVALUE") 1873 if self._match_text_seq("MAXVALUE"): 1874 return exp.var("MAXVALUE") 1875 return self._parse_bitwise() 1876 1877 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1878 expression = None 1879 from_expressions = None 1880 to_expressions = None 1881 1882 if self._match(TokenType.IN): 1883 this = self._parse_wrapped_csv(self._parse_bitwise) 1884 elif self._match(TokenType.FROM): 1885 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1886 self._match_text_seq("TO") 1887 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1888 elif self._match_text_seq("WITH", "(", "MODULUS"): 1889 this = self._parse_number() 1890 self._match_text_seq(",", "REMAINDER") 1891 expression = self._parse_number() 1892 self._match_r_paren() 1893 else: 1894 self.raise_error("Failed to parse partition bound spec.") 1895 1896 return self.expression( 1897 exp.PartitionBoundSpec, 1898 this=this, 1899 expression=expression, 1900 from_expressions=from_expressions, 1901 to_expressions=to_expressions, 1902 ) 1903 1904 # https://www.postgresql.org/docs/current/sql-createtable.html 1905 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1906 if not self._match_text_seq("OF"): 1907 self._retreat(self._index - 1) 1908 return None 1909 1910 this = self._parse_table(schema=True) 1911 1912 if self._match(TokenType.DEFAULT): 1913 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1914 elif self._match_text_seq("FOR", "VALUES"): 1915 expression = self._parse_partition_bound_spec() 1916 else: 1917 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1918 1919 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1920 1921 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1922 self._match(TokenType.EQ) 1923 return self.expression( 1924 exp.PartitionedByProperty, 1925 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1926 ) 1927 1928 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1929 if self._match_text_seq("AND", "STATISTICS"): 1930 statistics = True 1931 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1932 statistics = False 1933 else: 1934 statistics = None 1935 1936 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1937 1938 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1939 if self._match_text_seq("SQL"): 1940 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 1941 return None 1942 1943 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1944 if self._match_text_seq("SQL", "DATA"): 1945 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 1946 return None 1947 1948 def _parse_no_property(self) -> t.Optional[exp.Expression]: 1949 if self._match_text_seq("PRIMARY", "INDEX"): 1950 return exp.NoPrimaryIndexProperty() 1951 if self._match_text_seq("SQL"): 1952 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 1953 return None 1954 1955 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1956 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1957 return exp.OnCommitProperty() 1958 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1959 return exp.OnCommitProperty(delete=True) 1960 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1961 1962 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1963 if self._match_text_seq("SQL", "DATA"): 1964 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 1965 return None 1966 1967 def _parse_distkey(self) -> exp.DistKeyProperty: 1968 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1969 1970 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1971 table = self._parse_table(schema=True) 1972 1973 options = [] 1974 while self._match_texts(("INCLUDING", "EXCLUDING")): 1975 this = self._prev.text.upper() 1976 1977 id_var = self._parse_id_var() 1978 if not id_var: 1979 return None 1980 1981 options.append( 1982 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1983 ) 1984 1985 return self.expression(exp.LikeProperty, this=table, expressions=options) 1986 1987 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1988 return self.expression( 1989 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1990 ) 1991 1992 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1993 self._match(TokenType.EQ) 1994 return self.expression( 1995 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1996 ) 1997 1998 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1999 self._match_text_seq("WITH", "CONNECTION") 2000 return self.expression( 2001 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2002 ) 2003 2004 def _parse_returns(self) -> exp.ReturnsProperty: 2005 value: t.Optional[exp.Expression] 2006 is_table = self._match(TokenType.TABLE) 2007 2008 if is_table: 2009 if self._match(TokenType.LT): 2010 value = self.expression( 2011 exp.Schema, 2012 this="TABLE", 2013 expressions=self._parse_csv(self._parse_struct_types), 2014 ) 2015 if not self._match(TokenType.GT): 2016 self.raise_error("Expecting >") 2017 else: 2018 value = self._parse_schema(exp.var("TABLE")) 2019 else: 2020 value = self._parse_types() 2021 2022 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2023 2024 def _parse_describe(self) -> exp.Describe: 2025 kind = self._match_set(self.CREATABLES) and self._prev.text 2026 extended = self._match_text_seq("EXTENDED") 2027 this = self._parse_table(schema=True) 2028 properties = self._parse_properties() 2029 expressions = properties.expressions if properties else None 2030 return self.expression( 2031 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2032 ) 2033 2034 def _parse_insert(self) -> exp.Insert: 2035 comments = ensure_list(self._prev_comments) 2036 overwrite = self._match(TokenType.OVERWRITE) 2037 ignore = self._match(TokenType.IGNORE) 2038 local = self._match_text_seq("LOCAL") 2039 alternative = None 2040 2041 if self._match_text_seq("DIRECTORY"): 2042 this: t.Optional[exp.Expression] = self.expression( 2043 exp.Directory, 2044 this=self._parse_var_or_string(), 2045 local=local, 2046 row_format=self._parse_row_format(match_row=True), 2047 ) 2048 else: 2049 if self._match(TokenType.OR): 2050 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2051 2052 self._match(TokenType.INTO) 2053 comments += ensure_list(self._prev_comments) 2054 self._match(TokenType.TABLE) 2055 this = self._parse_table(schema=True) 2056 2057 returning = self._parse_returning() 2058 2059 return self.expression( 2060 exp.Insert, 2061 comments=comments, 2062 this=this, 2063 by_name=self._match_text_seq("BY", "NAME"), 2064 exists=self._parse_exists(), 2065 partition=self._parse_partition(), 2066 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2067 and self._parse_conjunction(), 2068 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2069 conflict=self._parse_on_conflict(), 2070 returning=returning or self._parse_returning(), 2071 overwrite=overwrite, 2072 alternative=alternative, 2073 ignore=ignore, 2074 ) 2075 2076 def _parse_kill(self) -> exp.Kill: 2077 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2078 2079 return self.expression( 2080 exp.Kill, 2081 this=self._parse_primary(), 2082 kind=kind, 2083 ) 2084 2085 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2086 conflict = self._match_text_seq("ON", "CONFLICT") 2087 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2088 2089 if not conflict and not duplicate: 2090 return None 2091 2092 nothing = None 2093 expressions = None 2094 key = None 2095 constraint = None 2096 2097 if conflict: 2098 if self._match_text_seq("ON", "CONSTRAINT"): 2099 constraint = self._parse_id_var() 2100 else: 2101 key = self._parse_csv(self._parse_value) 2102 2103 self._match_text_seq("DO") 2104 if self._match_text_seq("NOTHING"): 2105 nothing = True 2106 else: 2107 self._match(TokenType.UPDATE) 2108 self._match(TokenType.SET) 2109 expressions = self._parse_csv(self._parse_equality) 2110 2111 return self.expression( 2112 exp.OnConflict, 2113 duplicate=duplicate, 2114 expressions=expressions, 2115 nothing=nothing, 2116 key=key, 2117 constraint=constraint, 2118 ) 2119 2120 def _parse_returning(self) -> t.Optional[exp.Returning]: 2121 if not self._match(TokenType.RETURNING): 2122 return None 2123 return self.expression( 2124 exp.Returning, 2125 expressions=self._parse_csv(self._parse_expression), 2126 into=self._match(TokenType.INTO) and self._parse_table_part(), 2127 ) 2128 2129 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2130 if not self._match(TokenType.FORMAT): 2131 return None 2132 return self._parse_row_format() 2133 2134 def _parse_row_format( 2135 self, match_row: bool = False 2136 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2137 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2138 return None 2139 2140 if self._match_text_seq("SERDE"): 2141 this = self._parse_string() 2142 2143 serde_properties = None 2144 if self._match(TokenType.SERDE_PROPERTIES): 2145 serde_properties = self.expression( 2146 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2147 ) 2148 2149 return self.expression( 2150 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2151 ) 2152 2153 self._match_text_seq("DELIMITED") 2154 2155 kwargs = {} 2156 2157 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2158 kwargs["fields"] = self._parse_string() 2159 if self._match_text_seq("ESCAPED", "BY"): 2160 kwargs["escaped"] = self._parse_string() 2161 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2162 kwargs["collection_items"] = self._parse_string() 2163 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2164 kwargs["map_keys"] = self._parse_string() 2165 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2166 kwargs["lines"] = self._parse_string() 2167 if self._match_text_seq("NULL", "DEFINED", "AS"): 2168 kwargs["null"] = self._parse_string() 2169 2170 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2171 2172 def _parse_load(self) -> exp.LoadData | exp.Command: 2173 if self._match_text_seq("DATA"): 2174 local = self._match_text_seq("LOCAL") 2175 self._match_text_seq("INPATH") 2176 inpath = self._parse_string() 2177 overwrite = self._match(TokenType.OVERWRITE) 2178 self._match_pair(TokenType.INTO, TokenType.TABLE) 2179 2180 return self.expression( 2181 exp.LoadData, 2182 this=self._parse_table(schema=True), 2183 local=local, 2184 overwrite=overwrite, 2185 inpath=inpath, 2186 partition=self._parse_partition(), 2187 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2188 serde=self._match_text_seq("SERDE") and self._parse_string(), 2189 ) 2190 return self._parse_as_command(self._prev) 2191 2192 def _parse_delete(self) -> exp.Delete: 2193 # This handles MySQL's "Multiple-Table Syntax" 2194 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2195 tables = None 2196 comments = self._prev_comments 2197 if not self._match(TokenType.FROM, advance=False): 2198 tables = self._parse_csv(self._parse_table) or None 2199 2200 returning = self._parse_returning() 2201 2202 return self.expression( 2203 exp.Delete, 2204 comments=comments, 2205 tables=tables, 2206 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2207 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2208 where=self._parse_where(), 2209 returning=returning or self._parse_returning(), 2210 limit=self._parse_limit(), 2211 ) 2212 2213 def _parse_update(self) -> exp.Update: 2214 comments = self._prev_comments 2215 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2216 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2217 returning = self._parse_returning() 2218 return self.expression( 2219 exp.Update, 2220 comments=comments, 2221 **{ # type: ignore 2222 "this": this, 2223 "expressions": expressions, 2224 "from": self._parse_from(joins=True), 2225 "where": self._parse_where(), 2226 "returning": returning or self._parse_returning(), 2227 "order": self._parse_order(), 2228 "limit": self._parse_limit(), 2229 }, 2230 ) 2231 2232 def _parse_uncache(self) -> exp.Uncache: 2233 if not self._match(TokenType.TABLE): 2234 self.raise_error("Expecting TABLE after UNCACHE") 2235 2236 return self.expression( 2237 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2238 ) 2239 2240 def _parse_cache(self) -> exp.Cache: 2241 lazy = self._match_text_seq("LAZY") 2242 self._match(TokenType.TABLE) 2243 table = self._parse_table(schema=True) 2244 2245 options = [] 2246 if self._match_text_seq("OPTIONS"): 2247 self._match_l_paren() 2248 k = self._parse_string() 2249 self._match(TokenType.EQ) 2250 v = self._parse_string() 2251 options = [k, v] 2252 self._match_r_paren() 2253 2254 self._match(TokenType.ALIAS) 2255 return self.expression( 2256 exp.Cache, 2257 this=table, 2258 lazy=lazy, 2259 options=options, 2260 expression=self._parse_select(nested=True), 2261 ) 2262 2263 def _parse_partition(self) -> t.Optional[exp.Partition]: 2264 if not self._match(TokenType.PARTITION): 2265 return None 2266 2267 return self.expression( 2268 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2269 ) 2270 2271 def _parse_value(self) -> exp.Tuple: 2272 if self._match(TokenType.L_PAREN): 2273 expressions = self._parse_csv(self._parse_expression) 2274 self._match_r_paren() 2275 return self.expression(exp.Tuple, expressions=expressions) 2276 2277 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2278 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2279 2280 def _parse_projections(self) -> t.List[exp.Expression]: 2281 return self._parse_expressions() 2282 2283 def _parse_select( 2284 self, 2285 nested: bool = False, 2286 table: bool = False, 2287 parse_subquery_alias: bool = True, 2288 parse_set_operation: bool = True, 2289 ) -> t.Optional[exp.Expression]: 2290 cte = self._parse_with() 2291 2292 if cte: 2293 this = self._parse_statement() 2294 2295 if not this: 2296 self.raise_error("Failed to parse any statement following CTE") 2297 return cte 2298 2299 if "with" in this.arg_types: 2300 this.set("with", cte) 2301 else: 2302 self.raise_error(f"{this.key} does not support CTE") 2303 this = cte 2304 2305 return this 2306 2307 # duckdb supports leading with FROM x 2308 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2309 2310 if self._match(TokenType.SELECT): 2311 comments = self._prev_comments 2312 2313 hint = self._parse_hint() 2314 all_ = self._match(TokenType.ALL) 2315 distinct = self._match_set(self.DISTINCT_TOKENS) 2316 2317 kind = ( 2318 self._match(TokenType.ALIAS) 2319 and self._match_texts(("STRUCT", "VALUE")) 2320 and self._prev.text.upper() 2321 ) 2322 2323 if distinct: 2324 distinct = self.expression( 2325 exp.Distinct, 2326 on=self._parse_value() if self._match(TokenType.ON) else None, 2327 ) 2328 2329 if all_ and distinct: 2330 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2331 2332 limit = self._parse_limit(top=True) 2333 projections = self._parse_projections() 2334 2335 this = self.expression( 2336 exp.Select, 2337 kind=kind, 2338 hint=hint, 2339 distinct=distinct, 2340 expressions=projections, 2341 limit=limit, 2342 ) 2343 this.comments = comments 2344 2345 into = self._parse_into() 2346 if into: 2347 this.set("into", into) 2348 2349 if not from_: 2350 from_ = self._parse_from() 2351 2352 if from_: 2353 this.set("from", from_) 2354 2355 this = self._parse_query_modifiers(this) 2356 elif (table or nested) and self._match(TokenType.L_PAREN): 2357 if self._match(TokenType.PIVOT): 2358 this = self._parse_simplified_pivot() 2359 elif self._match(TokenType.FROM): 2360 this = exp.select("*").from_( 2361 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2362 ) 2363 else: 2364 this = ( 2365 self._parse_table() 2366 if table 2367 else self._parse_select(nested=True, parse_set_operation=False) 2368 ) 2369 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2370 2371 self._match_r_paren() 2372 2373 # We return early here so that the UNION isn't attached to the subquery by the 2374 # following call to _parse_set_operations, but instead becomes the parent node 2375 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2376 elif self._match(TokenType.VALUES, advance=False): 2377 this = self._parse_derived_table_values() 2378 elif from_: 2379 this = exp.select("*").from_(from_.this, copy=False) 2380 else: 2381 this = None 2382 2383 if parse_set_operation: 2384 return self._parse_set_operations(this) 2385 return this 2386 2387 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2388 if not skip_with_token and not self._match(TokenType.WITH): 2389 return None 2390 2391 comments = self._prev_comments 2392 recursive = self._match(TokenType.RECURSIVE) 2393 2394 expressions = [] 2395 while True: 2396 expressions.append(self._parse_cte()) 2397 2398 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2399 break 2400 else: 2401 self._match(TokenType.WITH) 2402 2403 return self.expression( 2404 exp.With, comments=comments, expressions=expressions, recursive=recursive 2405 ) 2406 2407 def _parse_cte(self) -> exp.CTE: 2408 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2409 if not alias or not alias.this: 2410 self.raise_error("Expected CTE to have alias") 2411 2412 self._match(TokenType.ALIAS) 2413 return self.expression( 2414 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2415 ) 2416 2417 def _parse_table_alias( 2418 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2419 ) -> t.Optional[exp.TableAlias]: 2420 any_token = self._match(TokenType.ALIAS) 2421 alias = ( 2422 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2423 or self._parse_string_as_identifier() 2424 ) 2425 2426 index = self._index 2427 if self._match(TokenType.L_PAREN): 2428 columns = self._parse_csv(self._parse_function_parameter) 2429 self._match_r_paren() if columns else self._retreat(index) 2430 else: 2431 columns = None 2432 2433 if not alias and not columns: 2434 return None 2435 2436 return self.expression(exp.TableAlias, this=alias, columns=columns) 2437 2438 def _parse_subquery( 2439 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2440 ) -> t.Optional[exp.Subquery]: 2441 if not this: 2442 return None 2443 2444 return self.expression( 2445 exp.Subquery, 2446 this=this, 2447 pivots=self._parse_pivots(), 2448 alias=self._parse_table_alias() if parse_alias else None, 2449 ) 2450 2451 def _parse_query_modifiers( 2452 self, this: t.Optional[exp.Expression] 2453 ) -> t.Optional[exp.Expression]: 2454 if isinstance(this, self.MODIFIABLES): 2455 for join in iter(self._parse_join, None): 2456 this.append("joins", join) 2457 for lateral in iter(self._parse_lateral, None): 2458 this.append("laterals", lateral) 2459 2460 while True: 2461 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2462 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2463 key, expression = parser(self) 2464 2465 if expression: 2466 this.set(key, expression) 2467 if key == "limit": 2468 offset = expression.args.pop("offset", None) 2469 2470 if offset: 2471 offset = exp.Offset(expression=offset) 2472 this.set("offset", offset) 2473 2474 limit_by_expressions = expression.expressions 2475 expression.set("expressions", None) 2476 offset.set("expressions", limit_by_expressions) 2477 continue 2478 break 2479 return this 2480 2481 def _parse_hint(self) -> t.Optional[exp.Hint]: 2482 if self._match(TokenType.HINT): 2483 hints = [] 2484 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2485 hints.extend(hint) 2486 2487 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2488 self.raise_error("Expected */ after HINT") 2489 2490 return self.expression(exp.Hint, expressions=hints) 2491 2492 return None 2493 2494 def _parse_into(self) -> t.Optional[exp.Into]: 2495 if not self._match(TokenType.INTO): 2496 return None 2497 2498 temp = self._match(TokenType.TEMPORARY) 2499 unlogged = self._match_text_seq("UNLOGGED") 2500 self._match(TokenType.TABLE) 2501 2502 return self.expression( 2503 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2504 ) 2505 2506 def _parse_from( 2507 self, joins: bool = False, skip_from_token: bool = False 2508 ) -> t.Optional[exp.From]: 2509 if not skip_from_token and not self._match(TokenType.FROM): 2510 return None 2511 2512 return self.expression( 2513 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2514 ) 2515 2516 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2517 if not self._match(TokenType.MATCH_RECOGNIZE): 2518 return None 2519 2520 self._match_l_paren() 2521 2522 partition = self._parse_partition_by() 2523 order = self._parse_order() 2524 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2525 2526 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2527 rows = exp.var("ONE ROW PER MATCH") 2528 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2529 text = "ALL ROWS PER MATCH" 2530 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2531 text += " SHOW EMPTY MATCHES" 2532 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2533 text += " OMIT EMPTY MATCHES" 2534 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2535 text += " WITH UNMATCHED ROWS" 2536 rows = exp.var(text) 2537 else: 2538 rows = None 2539 2540 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2541 text = "AFTER MATCH SKIP" 2542 if self._match_text_seq("PAST", "LAST", "ROW"): 2543 text += " PAST LAST ROW" 2544 elif self._match_text_seq("TO", "NEXT", "ROW"): 2545 text += " TO NEXT ROW" 2546 elif self._match_text_seq("TO", "FIRST"): 2547 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2548 elif self._match_text_seq("TO", "LAST"): 2549 text += f" TO LAST {self._advance_any().text}" # type: ignore 2550 after = exp.var(text) 2551 else: 2552 after = None 2553 2554 if self._match_text_seq("PATTERN"): 2555 self._match_l_paren() 2556 2557 if not self._curr: 2558 self.raise_error("Expecting )", self._curr) 2559 2560 paren = 1 2561 start = self._curr 2562 2563 while self._curr and paren > 0: 2564 if self._curr.token_type == TokenType.L_PAREN: 2565 paren += 1 2566 if self._curr.token_type == TokenType.R_PAREN: 2567 paren -= 1 2568 2569 end = self._prev 2570 self._advance() 2571 2572 if paren > 0: 2573 self.raise_error("Expecting )", self._curr) 2574 2575 pattern = exp.var(self._find_sql(start, end)) 2576 else: 2577 pattern = None 2578 2579 define = ( 2580 self._parse_csv(self._parse_name_as_expression) 2581 if self._match_text_seq("DEFINE") 2582 else None 2583 ) 2584 2585 self._match_r_paren() 2586 2587 return self.expression( 2588 exp.MatchRecognize, 2589 partition_by=partition, 2590 order=order, 2591 measures=measures, 2592 rows=rows, 2593 after=after, 2594 pattern=pattern, 2595 define=define, 2596 alias=self._parse_table_alias(), 2597 ) 2598 2599 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2600 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2601 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2602 cross_apply = False 2603 2604 if cross_apply is not None: 2605 this = self._parse_select(table=True) 2606 view = None 2607 outer = None 2608 elif self._match(TokenType.LATERAL): 2609 this = self._parse_select(table=True) 2610 view = self._match(TokenType.VIEW) 2611 outer = self._match(TokenType.OUTER) 2612 else: 2613 return None 2614 2615 if not this: 2616 this = ( 2617 self._parse_unnest() 2618 or self._parse_function() 2619 or self._parse_id_var(any_token=False) 2620 ) 2621 2622 while self._match(TokenType.DOT): 2623 this = exp.Dot( 2624 this=this, 2625 expression=self._parse_function() or self._parse_id_var(any_token=False), 2626 ) 2627 2628 if view: 2629 table = self._parse_id_var(any_token=False) 2630 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2631 table_alias: t.Optional[exp.TableAlias] = self.expression( 2632 exp.TableAlias, this=table, columns=columns 2633 ) 2634 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2635 # We move the alias from the lateral's child node to the lateral itself 2636 table_alias = this.args["alias"].pop() 2637 else: 2638 table_alias = self._parse_table_alias() 2639 2640 return self.expression( 2641 exp.Lateral, 2642 this=this, 2643 view=view, 2644 outer=outer, 2645 alias=table_alias, 2646 cross_apply=cross_apply, 2647 ) 2648 2649 def _parse_join_parts( 2650 self, 2651 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2652 return ( 2653 self._match_set(self.JOIN_METHODS) and self._prev, 2654 self._match_set(self.JOIN_SIDES) and self._prev, 2655 self._match_set(self.JOIN_KINDS) and self._prev, 2656 ) 2657 2658 def _parse_join( 2659 self, skip_join_token: bool = False, parse_bracket: bool = False 2660 ) -> t.Optional[exp.Join]: 2661 if self._match(TokenType.COMMA): 2662 return self.expression(exp.Join, this=self._parse_table()) 2663 2664 index = self._index 2665 method, side, kind = self._parse_join_parts() 2666 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2667 join = self._match(TokenType.JOIN) 2668 2669 if not skip_join_token and not join: 2670 self._retreat(index) 2671 kind = None 2672 method = None 2673 side = None 2674 2675 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2676 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2677 2678 if not skip_join_token and not join and not outer_apply and not cross_apply: 2679 return None 2680 2681 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2682 2683 if method: 2684 kwargs["method"] = method.text 2685 if side: 2686 kwargs["side"] = side.text 2687 if kind: 2688 kwargs["kind"] = kind.text 2689 if hint: 2690 kwargs["hint"] = hint 2691 2692 if self._match(TokenType.ON): 2693 kwargs["on"] = self._parse_conjunction() 2694 elif self._match(TokenType.USING): 2695 kwargs["using"] = self._parse_wrapped_id_vars() 2696 elif not (kind and kind.token_type == TokenType.CROSS): 2697 index = self._index 2698 join = self._parse_join() 2699 2700 if join and self._match(TokenType.ON): 2701 kwargs["on"] = self._parse_conjunction() 2702 elif join and self._match(TokenType.USING): 2703 kwargs["using"] = self._parse_wrapped_id_vars() 2704 else: 2705 join = None 2706 self._retreat(index) 2707 2708 kwargs["this"].set("joins", [join] if join else None) 2709 2710 comments = [c for token in (method, side, kind) if token for c in token.comments] 2711 return self.expression(exp.Join, comments=comments, **kwargs) 2712 2713 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2714 this = self._parse_conjunction() 2715 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2716 return this 2717 2718 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2719 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2720 2721 return this 2722 2723 def _parse_index( 2724 self, 2725 index: t.Optional[exp.Expression] = None, 2726 ) -> t.Optional[exp.Index]: 2727 if index: 2728 unique = None 2729 primary = None 2730 amp = None 2731 2732 self._match(TokenType.ON) 2733 self._match(TokenType.TABLE) # hive 2734 table = self._parse_table_parts(schema=True) 2735 else: 2736 unique = self._match(TokenType.UNIQUE) 2737 primary = self._match_text_seq("PRIMARY") 2738 amp = self._match_text_seq("AMP") 2739 2740 if not self._match(TokenType.INDEX): 2741 return None 2742 2743 index = self._parse_id_var() 2744 table = None 2745 2746 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2747 2748 if self._match(TokenType.L_PAREN, advance=False): 2749 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2750 else: 2751 columns = None 2752 2753 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2754 2755 return self.expression( 2756 exp.Index, 2757 this=index, 2758 table=table, 2759 using=using, 2760 columns=columns, 2761 unique=unique, 2762 primary=primary, 2763 amp=amp, 2764 include=include, 2765 partition_by=self._parse_partition_by(), 2766 where=self._parse_where(), 2767 ) 2768 2769 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2770 hints: t.List[exp.Expression] = [] 2771 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2772 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2773 hints.append( 2774 self.expression( 2775 exp.WithTableHint, 2776 expressions=self._parse_csv( 2777 lambda: self._parse_function() or self._parse_var(any_token=True) 2778 ), 2779 ) 2780 ) 2781 self._match_r_paren() 2782 else: 2783 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2784 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2785 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2786 2787 self._match_texts(("INDEX", "KEY")) 2788 if self._match(TokenType.FOR): 2789 hint.set("target", self._advance_any() and self._prev.text.upper()) 2790 2791 hint.set("expressions", self._parse_wrapped_id_vars()) 2792 hints.append(hint) 2793 2794 return hints or None 2795 2796 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2797 return ( 2798 (not schema and self._parse_function(optional_parens=False)) 2799 or self._parse_id_var(any_token=False) 2800 or self._parse_string_as_identifier() 2801 or self._parse_placeholder() 2802 ) 2803 2804 def _parse_table_parts(self, schema: bool = False, is_db_reference: bool = False) -> exp.Table: 2805 catalog = None 2806 db = None 2807 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2808 2809 while self._match(TokenType.DOT): 2810 if catalog: 2811 # This allows nesting the table in arbitrarily many dot expressions if needed 2812 table = self.expression( 2813 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2814 ) 2815 else: 2816 catalog = db 2817 db = table 2818 table = self._parse_table_part(schema=schema) or "" 2819 2820 if is_db_reference: 2821 catalog = db 2822 db = table 2823 table = None 2824 2825 if not table and not is_db_reference: 2826 self.raise_error(f"Expected table name but got {self._curr}") 2827 if not db and is_db_reference: 2828 self.raise_error(f"Expected database name but got {self._curr}") 2829 2830 return self.expression( 2831 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2832 ) 2833 2834 def _parse_table( 2835 self, 2836 schema: bool = False, 2837 joins: bool = False, 2838 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2839 parse_bracket: bool = False, 2840 is_db_reference: bool = False, 2841 ) -> t.Optional[exp.Expression]: 2842 lateral = self._parse_lateral() 2843 if lateral: 2844 return lateral 2845 2846 unnest = self._parse_unnest() 2847 if unnest: 2848 return unnest 2849 2850 values = self._parse_derived_table_values() 2851 if values: 2852 return values 2853 2854 subquery = self._parse_select(table=True) 2855 if subquery: 2856 if not subquery.args.get("pivots"): 2857 subquery.set("pivots", self._parse_pivots()) 2858 return subquery 2859 2860 bracket = parse_bracket and self._parse_bracket(None) 2861 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2862 this = t.cast( 2863 exp.Expression, 2864 bracket 2865 or self._parse_bracket( 2866 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 2867 ), 2868 ) 2869 2870 if schema: 2871 return self._parse_schema(this=this) 2872 2873 version = self._parse_version() 2874 2875 if version: 2876 this.set("version", version) 2877 2878 if self.dialect.ALIAS_POST_TABLESAMPLE: 2879 table_sample = self._parse_table_sample() 2880 2881 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2882 if alias: 2883 this.set("alias", alias) 2884 2885 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2886 return self.expression( 2887 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2888 ) 2889 2890 this.set("hints", self._parse_table_hints()) 2891 2892 if not this.args.get("pivots"): 2893 this.set("pivots", self._parse_pivots()) 2894 2895 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2896 table_sample = self._parse_table_sample() 2897 2898 if table_sample: 2899 table_sample.set("this", this) 2900 this = table_sample 2901 2902 if joins: 2903 for join in iter(self._parse_join, None): 2904 this.append("joins", join) 2905 2906 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2907 this.set("ordinality", True) 2908 this.set("alias", self._parse_table_alias()) 2909 2910 return this 2911 2912 def _parse_version(self) -> t.Optional[exp.Version]: 2913 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2914 this = "TIMESTAMP" 2915 elif self._match(TokenType.VERSION_SNAPSHOT): 2916 this = "VERSION" 2917 else: 2918 return None 2919 2920 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2921 kind = self._prev.text.upper() 2922 start = self._parse_bitwise() 2923 self._match_texts(("TO", "AND")) 2924 end = self._parse_bitwise() 2925 expression: t.Optional[exp.Expression] = self.expression( 2926 exp.Tuple, expressions=[start, end] 2927 ) 2928 elif self._match_text_seq("CONTAINED", "IN"): 2929 kind = "CONTAINED IN" 2930 expression = self.expression( 2931 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2932 ) 2933 elif self._match(TokenType.ALL): 2934 kind = "ALL" 2935 expression = None 2936 else: 2937 self._match_text_seq("AS", "OF") 2938 kind = "AS OF" 2939 expression = self._parse_type() 2940 2941 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2942 2943 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2944 if not self._match(TokenType.UNNEST): 2945 return None 2946 2947 expressions = self._parse_wrapped_csv(self._parse_equality) 2948 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2949 2950 alias = self._parse_table_alias() if with_alias else None 2951 2952 if alias: 2953 if self.dialect.UNNEST_COLUMN_ONLY: 2954 if alias.args.get("columns"): 2955 self.raise_error("Unexpected extra column alias in unnest.") 2956 2957 alias.set("columns", [alias.this]) 2958 alias.set("this", None) 2959 2960 columns = alias.args.get("columns") or [] 2961 if offset and len(expressions) < len(columns): 2962 offset = columns.pop() 2963 2964 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2965 self._match(TokenType.ALIAS) 2966 offset = self._parse_id_var( 2967 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2968 ) or exp.to_identifier("offset") 2969 2970 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2971 2972 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2973 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2974 if not is_derived and not self._match_text_seq("VALUES"): 2975 return None 2976 2977 expressions = self._parse_csv(self._parse_value) 2978 alias = self._parse_table_alias() 2979 2980 if is_derived: 2981 self._match_r_paren() 2982 2983 return self.expression( 2984 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2985 ) 2986 2987 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2988 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2989 as_modifier and self._match_text_seq("USING", "SAMPLE") 2990 ): 2991 return None 2992 2993 bucket_numerator = None 2994 bucket_denominator = None 2995 bucket_field = None 2996 percent = None 2997 size = None 2998 seed = None 2999 3000 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3001 matched_l_paren = self._match(TokenType.L_PAREN) 3002 3003 if self.TABLESAMPLE_CSV: 3004 num = None 3005 expressions = self._parse_csv(self._parse_primary) 3006 else: 3007 expressions = None 3008 num = ( 3009 self._parse_factor() 3010 if self._match(TokenType.NUMBER, advance=False) 3011 else self._parse_primary() or self._parse_placeholder() 3012 ) 3013 3014 if self._match_text_seq("BUCKET"): 3015 bucket_numerator = self._parse_number() 3016 self._match_text_seq("OUT", "OF") 3017 bucket_denominator = bucket_denominator = self._parse_number() 3018 self._match(TokenType.ON) 3019 bucket_field = self._parse_field() 3020 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3021 percent = num 3022 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3023 size = num 3024 else: 3025 percent = num 3026 3027 if matched_l_paren: 3028 self._match_r_paren() 3029 3030 if self._match(TokenType.L_PAREN): 3031 method = self._parse_var(upper=True) 3032 seed = self._match(TokenType.COMMA) and self._parse_number() 3033 self._match_r_paren() 3034 elif self._match_texts(("SEED", "REPEATABLE")): 3035 seed = self._parse_wrapped(self._parse_number) 3036 3037 return self.expression( 3038 exp.TableSample, 3039 expressions=expressions, 3040 method=method, 3041 bucket_numerator=bucket_numerator, 3042 bucket_denominator=bucket_denominator, 3043 bucket_field=bucket_field, 3044 percent=percent, 3045 size=size, 3046 seed=seed, 3047 ) 3048 3049 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3050 return list(iter(self._parse_pivot, None)) or None 3051 3052 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 3053 return list(iter(self._parse_join, None)) or None 3054 3055 # https://duckdb.org/docs/sql/statements/pivot 3056 def _parse_simplified_pivot(self) -> exp.Pivot: 3057 def _parse_on() -> t.Optional[exp.Expression]: 3058 this = self._parse_bitwise() 3059 return self._parse_in(this) if self._match(TokenType.IN) else this 3060 3061 this = self._parse_table() 3062 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3063 using = self._match(TokenType.USING) and self._parse_csv( 3064 lambda: self._parse_alias(self._parse_function()) 3065 ) 3066 group = self._parse_group() 3067 return self.expression( 3068 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3069 ) 3070 3071 def _parse_pivot_in(self) -> exp.In: 3072 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3073 this = self._parse_conjunction() 3074 3075 self._match(TokenType.ALIAS) 3076 alias = self._parse_field() 3077 if alias: 3078 return self.expression(exp.PivotAlias, this=this, alias=alias) 3079 3080 return this 3081 3082 value = self._parse_column() 3083 3084 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3085 self.raise_error("Expecting IN (") 3086 3087 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3088 3089 self._match_r_paren() 3090 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3091 3092 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3093 index = self._index 3094 include_nulls = None 3095 3096 if self._match(TokenType.PIVOT): 3097 unpivot = False 3098 elif self._match(TokenType.UNPIVOT): 3099 unpivot = True 3100 3101 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3102 if self._match_text_seq("INCLUDE", "NULLS"): 3103 include_nulls = True 3104 elif self._match_text_seq("EXCLUDE", "NULLS"): 3105 include_nulls = False 3106 else: 3107 return None 3108 3109 expressions = [] 3110 3111 if not self._match(TokenType.L_PAREN): 3112 self._retreat(index) 3113 return None 3114 3115 if unpivot: 3116 expressions = self._parse_csv(self._parse_column) 3117 else: 3118 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3119 3120 if not expressions: 3121 self.raise_error("Failed to parse PIVOT's aggregation list") 3122 3123 if not self._match(TokenType.FOR): 3124 self.raise_error("Expecting FOR") 3125 3126 field = self._parse_pivot_in() 3127 3128 self._match_r_paren() 3129 3130 pivot = self.expression( 3131 exp.Pivot, 3132 expressions=expressions, 3133 field=field, 3134 unpivot=unpivot, 3135 include_nulls=include_nulls, 3136 ) 3137 3138 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3139 pivot.set("alias", self._parse_table_alias()) 3140 3141 if not unpivot: 3142 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3143 3144 columns: t.List[exp.Expression] = [] 3145 for fld in pivot.args["field"].expressions: 3146 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3147 for name in names: 3148 if self.PREFIXED_PIVOT_COLUMNS: 3149 name = f"{name}_{field_name}" if name else field_name 3150 else: 3151 name = f"{field_name}_{name}" if name else field_name 3152 3153 columns.append(exp.to_identifier(name)) 3154 3155 pivot.set("columns", columns) 3156 3157 return pivot 3158 3159 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3160 return [agg.alias for agg in aggregations] 3161 3162 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3163 if not skip_where_token and not self._match(TokenType.WHERE): 3164 return None 3165 3166 return self.expression( 3167 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3168 ) 3169 3170 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3171 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3172 return None 3173 3174 elements = defaultdict(list) 3175 3176 if self._match(TokenType.ALL): 3177 return self.expression(exp.Group, all=True) 3178 3179 while True: 3180 expressions = self._parse_csv(self._parse_conjunction) 3181 if expressions: 3182 elements["expressions"].extend(expressions) 3183 3184 grouping_sets = self._parse_grouping_sets() 3185 if grouping_sets: 3186 elements["grouping_sets"].extend(grouping_sets) 3187 3188 rollup = None 3189 cube = None 3190 totals = None 3191 3192 index = self._index 3193 with_ = self._match(TokenType.WITH) 3194 if self._match(TokenType.ROLLUP): 3195 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3196 elements["rollup"].extend(ensure_list(rollup)) 3197 3198 if self._match(TokenType.CUBE): 3199 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3200 elements["cube"].extend(ensure_list(cube)) 3201 3202 if self._match_text_seq("TOTALS"): 3203 totals = True 3204 elements["totals"] = True # type: ignore 3205 3206 if not (grouping_sets or rollup or cube or totals): 3207 if with_: 3208 self._retreat(index) 3209 break 3210 3211 return self.expression(exp.Group, **elements) # type: ignore 3212 3213 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3214 if not self._match(TokenType.GROUPING_SETS): 3215 return None 3216 3217 return self._parse_wrapped_csv(self._parse_grouping_set) 3218 3219 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3220 if self._match(TokenType.L_PAREN): 3221 grouping_set = self._parse_csv(self._parse_column) 3222 self._match_r_paren() 3223 return self.expression(exp.Tuple, expressions=grouping_set) 3224 3225 return self._parse_column() 3226 3227 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3228 if not skip_having_token and not self._match(TokenType.HAVING): 3229 return None 3230 return self.expression(exp.Having, this=self._parse_conjunction()) 3231 3232 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3233 if not self._match(TokenType.QUALIFY): 3234 return None 3235 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3236 3237 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3238 if skip_start_token: 3239 start = None 3240 elif self._match(TokenType.START_WITH): 3241 start = self._parse_conjunction() 3242 else: 3243 return None 3244 3245 self._match(TokenType.CONNECT_BY) 3246 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3247 exp.Prior, this=self._parse_bitwise() 3248 ) 3249 connect = self._parse_conjunction() 3250 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3251 3252 if not start and self._match(TokenType.START_WITH): 3253 start = self._parse_conjunction() 3254 3255 return self.expression(exp.Connect, start=start, connect=connect) 3256 3257 def _parse_name_as_expression(self) -> exp.Alias: 3258 return self.expression( 3259 exp.Alias, 3260 alias=self._parse_id_var(any_token=True), 3261 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3262 ) 3263 3264 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3265 if self._match_text_seq("INTERPOLATE"): 3266 return self._parse_wrapped_csv(self._parse_name_as_expression) 3267 return None 3268 3269 def _parse_order( 3270 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3271 ) -> t.Optional[exp.Expression]: 3272 siblings = None 3273 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3274 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3275 return this 3276 3277 siblings = True 3278 3279 return self.expression( 3280 exp.Order, 3281 this=this, 3282 expressions=self._parse_csv(self._parse_ordered), 3283 interpolate=self._parse_interpolate(), 3284 siblings=siblings, 3285 ) 3286 3287 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3288 if not self._match(token): 3289 return None 3290 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3291 3292 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3293 this = parse_method() if parse_method else self._parse_conjunction() 3294 3295 asc = self._match(TokenType.ASC) 3296 desc = self._match(TokenType.DESC) or (asc and False) 3297 3298 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3299 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3300 3301 nulls_first = is_nulls_first or False 3302 explicitly_null_ordered = is_nulls_first or is_nulls_last 3303 3304 if ( 3305 not explicitly_null_ordered 3306 and ( 3307 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3308 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3309 ) 3310 and self.dialect.NULL_ORDERING != "nulls_are_last" 3311 ): 3312 nulls_first = True 3313 3314 if self._match_text_seq("WITH", "FILL"): 3315 with_fill = self.expression( 3316 exp.WithFill, 3317 **{ # type: ignore 3318 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3319 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3320 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3321 }, 3322 ) 3323 else: 3324 with_fill = None 3325 3326 return self.expression( 3327 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3328 ) 3329 3330 def _parse_limit( 3331 self, this: t.Optional[exp.Expression] = None, top: bool = False 3332 ) -> t.Optional[exp.Expression]: 3333 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3334 comments = self._prev_comments 3335 if top: 3336 limit_paren = self._match(TokenType.L_PAREN) 3337 expression = self._parse_term() if limit_paren else self._parse_number() 3338 3339 if limit_paren: 3340 self._match_r_paren() 3341 else: 3342 expression = self._parse_term() 3343 3344 if self._match(TokenType.COMMA): 3345 offset = expression 3346 expression = self._parse_term() 3347 else: 3348 offset = None 3349 3350 limit_exp = self.expression( 3351 exp.Limit, 3352 this=this, 3353 expression=expression, 3354 offset=offset, 3355 comments=comments, 3356 expressions=self._parse_limit_by(), 3357 ) 3358 3359 return limit_exp 3360 3361 if self._match(TokenType.FETCH): 3362 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3363 direction = self._prev.text.upper() if direction else "FIRST" 3364 3365 count = self._parse_field(tokens=self.FETCH_TOKENS) 3366 percent = self._match(TokenType.PERCENT) 3367 3368 self._match_set((TokenType.ROW, TokenType.ROWS)) 3369 3370 only = self._match_text_seq("ONLY") 3371 with_ties = self._match_text_seq("WITH", "TIES") 3372 3373 if only and with_ties: 3374 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3375 3376 return self.expression( 3377 exp.Fetch, 3378 direction=direction, 3379 count=count, 3380 percent=percent, 3381 with_ties=with_ties, 3382 ) 3383 3384 return this 3385 3386 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3387 if not self._match(TokenType.OFFSET): 3388 return this 3389 3390 count = self._parse_term() 3391 self._match_set((TokenType.ROW, TokenType.ROWS)) 3392 3393 return self.expression( 3394 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3395 ) 3396 3397 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3398 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3399 3400 def _parse_locks(self) -> t.List[exp.Lock]: 3401 locks = [] 3402 while True: 3403 if self._match_text_seq("FOR", "UPDATE"): 3404 update = True 3405 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3406 "LOCK", "IN", "SHARE", "MODE" 3407 ): 3408 update = False 3409 else: 3410 break 3411 3412 expressions = None 3413 if self._match_text_seq("OF"): 3414 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3415 3416 wait: t.Optional[bool | exp.Expression] = None 3417 if self._match_text_seq("NOWAIT"): 3418 wait = True 3419 elif self._match_text_seq("WAIT"): 3420 wait = self._parse_primary() 3421 elif self._match_text_seq("SKIP", "LOCKED"): 3422 wait = False 3423 3424 locks.append( 3425 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3426 ) 3427 3428 return locks 3429 3430 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3431 while this and self._match_set(self.SET_OPERATIONS): 3432 token_type = self._prev.token_type 3433 3434 if token_type == TokenType.UNION: 3435 operation = exp.Union 3436 elif token_type == TokenType.EXCEPT: 3437 operation = exp.Except 3438 else: 3439 operation = exp.Intersect 3440 3441 comments = self._prev.comments 3442 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3443 by_name = self._match_text_seq("BY", "NAME") 3444 expression = self._parse_select(nested=True, parse_set_operation=False) 3445 3446 this = self.expression( 3447 operation, 3448 comments=comments, 3449 this=this, 3450 distinct=distinct, 3451 by_name=by_name, 3452 expression=expression, 3453 ) 3454 3455 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3456 expression = this.expression 3457 3458 if expression: 3459 for arg in self.UNION_MODIFIERS: 3460 expr = expression.args.get(arg) 3461 if expr: 3462 this.set(arg, expr.pop()) 3463 3464 return this 3465 3466 def _parse_expression(self) -> t.Optional[exp.Expression]: 3467 return self._parse_alias(self._parse_conjunction()) 3468 3469 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3470 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3471 3472 def _parse_equality(self) -> t.Optional[exp.Expression]: 3473 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3474 3475 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3476 return self._parse_tokens(self._parse_range, self.COMPARISON) 3477 3478 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3479 this = this or self._parse_bitwise() 3480 negate = self._match(TokenType.NOT) 3481 3482 if self._match_set(self.RANGE_PARSERS): 3483 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3484 if not expression: 3485 return this 3486 3487 this = expression 3488 elif self._match(TokenType.ISNULL): 3489 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3490 3491 # Postgres supports ISNULL and NOTNULL for conditions. 3492 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3493 if self._match(TokenType.NOTNULL): 3494 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3495 this = self.expression(exp.Not, this=this) 3496 3497 if negate: 3498 this = self.expression(exp.Not, this=this) 3499 3500 if self._match(TokenType.IS): 3501 this = self._parse_is(this) 3502 3503 return this 3504 3505 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3506 index = self._index - 1 3507 negate = self._match(TokenType.NOT) 3508 3509 if self._match_text_seq("DISTINCT", "FROM"): 3510 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3511 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3512 3513 expression = self._parse_null() or self._parse_boolean() 3514 if not expression: 3515 self._retreat(index) 3516 return None 3517 3518 this = self.expression(exp.Is, this=this, expression=expression) 3519 return self.expression(exp.Not, this=this) if negate else this 3520 3521 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3522 unnest = self._parse_unnest(with_alias=False) 3523 if unnest: 3524 this = self.expression(exp.In, this=this, unnest=unnest) 3525 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3526 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3527 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3528 3529 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3530 this = self.expression(exp.In, this=this, query=expressions[0]) 3531 else: 3532 this = self.expression(exp.In, this=this, expressions=expressions) 3533 3534 if matched_l_paren: 3535 self._match_r_paren(this) 3536 elif not self._match(TokenType.R_BRACKET, expression=this): 3537 self.raise_error("Expecting ]") 3538 else: 3539 this = self.expression(exp.In, this=this, field=self._parse_field()) 3540 3541 return this 3542 3543 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3544 low = self._parse_bitwise() 3545 self._match(TokenType.AND) 3546 high = self._parse_bitwise() 3547 return self.expression(exp.Between, this=this, low=low, high=high) 3548 3549 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3550 if not self._match(TokenType.ESCAPE): 3551 return this 3552 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3553 3554 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3555 index = self._index 3556 3557 if not self._match(TokenType.INTERVAL) and match_interval: 3558 return None 3559 3560 if self._match(TokenType.STRING, advance=False): 3561 this = self._parse_primary() 3562 else: 3563 this = self._parse_term() 3564 3565 if not this or ( 3566 isinstance(this, exp.Column) 3567 and not this.table 3568 and not this.this.quoted 3569 and this.name.upper() == "IS" 3570 ): 3571 self._retreat(index) 3572 return None 3573 3574 unit = self._parse_function() or ( 3575 not self._match(TokenType.ALIAS, advance=False) 3576 and self._parse_var(any_token=True, upper=True) 3577 ) 3578 3579 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3580 # each INTERVAL expression into this canonical form so it's easy to transpile 3581 if this and this.is_number: 3582 this = exp.Literal.string(this.name) 3583 elif this and this.is_string: 3584 parts = this.name.split() 3585 3586 if len(parts) == 2: 3587 if unit: 3588 # This is not actually a unit, it's something else (e.g. a "window side") 3589 unit = None 3590 self._retreat(self._index - 1) 3591 3592 this = exp.Literal.string(parts[0]) 3593 unit = self.expression(exp.Var, this=parts[1].upper()) 3594 3595 return self.expression(exp.Interval, this=this, unit=unit) 3596 3597 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3598 this = self._parse_term() 3599 3600 while True: 3601 if self._match_set(self.BITWISE): 3602 this = self.expression( 3603 self.BITWISE[self._prev.token_type], 3604 this=this, 3605 expression=self._parse_term(), 3606 ) 3607 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3608 this = self.expression( 3609 exp.DPipe, 3610 this=this, 3611 expression=self._parse_term(), 3612 safe=not self.dialect.STRICT_STRING_CONCAT, 3613 ) 3614 elif self._match(TokenType.DQMARK): 3615 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3616 elif self._match_pair(TokenType.LT, TokenType.LT): 3617 this = self.expression( 3618 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3619 ) 3620 elif self._match_pair(TokenType.GT, TokenType.GT): 3621 this = self.expression( 3622 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3623 ) 3624 else: 3625 break 3626 3627 return this 3628 3629 def _parse_term(self) -> t.Optional[exp.Expression]: 3630 return self._parse_tokens(self._parse_factor, self.TERM) 3631 3632 def _parse_factor(self) -> t.Optional[exp.Expression]: 3633 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3634 this = parse_method() 3635 3636 while self._match_set(self.FACTOR): 3637 this = self.expression( 3638 self.FACTOR[self._prev.token_type], 3639 this=this, 3640 comments=self._prev_comments, 3641 expression=parse_method(), 3642 ) 3643 if isinstance(this, exp.Div): 3644 this.args["typed"] = self.dialect.TYPED_DIVISION 3645 this.args["safe"] = self.dialect.SAFE_DIVISION 3646 3647 return this 3648 3649 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3650 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3651 3652 def _parse_unary(self) -> t.Optional[exp.Expression]: 3653 if self._match_set(self.UNARY_PARSERS): 3654 return self.UNARY_PARSERS[self._prev.token_type](self) 3655 return self._parse_at_time_zone(self._parse_type()) 3656 3657 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3658 interval = parse_interval and self._parse_interval() 3659 if interval: 3660 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3661 while True: 3662 index = self._index 3663 self._match(TokenType.PLUS) 3664 3665 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3666 self._retreat(index) 3667 break 3668 3669 interval = self.expression( # type: ignore 3670 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3671 ) 3672 3673 return interval 3674 3675 index = self._index 3676 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3677 this = self._parse_column() 3678 3679 if data_type: 3680 if isinstance(this, exp.Literal): 3681 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3682 if parser: 3683 return parser(self, this, data_type) 3684 return self.expression(exp.Cast, this=this, to=data_type) 3685 if not data_type.expressions: 3686 self._retreat(index) 3687 return self._parse_column() 3688 return self._parse_column_ops(data_type) 3689 3690 return this and self._parse_column_ops(this) 3691 3692 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3693 this = self._parse_type() 3694 if not this: 3695 return None 3696 3697 return self.expression( 3698 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3699 ) 3700 3701 def _parse_types( 3702 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3703 ) -> t.Optional[exp.Expression]: 3704 index = self._index 3705 3706 prefix = self._match_text_seq("SYSUDTLIB", ".") 3707 3708 if not self._match_set(self.TYPE_TOKENS): 3709 identifier = allow_identifiers and self._parse_id_var( 3710 any_token=False, tokens=(TokenType.VAR,) 3711 ) 3712 if identifier: 3713 tokens = self.dialect.tokenize(identifier.name) 3714 3715 if len(tokens) != 1: 3716 self.raise_error("Unexpected identifier", self._prev) 3717 3718 if tokens[0].token_type in self.TYPE_TOKENS: 3719 self._prev = tokens[0] 3720 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3721 type_name = identifier.name 3722 3723 while self._match(TokenType.DOT): 3724 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3725 3726 return exp.DataType.build(type_name, udt=True) 3727 else: 3728 self._retreat(self._index - 1) 3729 return None 3730 else: 3731 return None 3732 3733 type_token = self._prev.token_type 3734 3735 if type_token == TokenType.PSEUDO_TYPE: 3736 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3737 3738 if type_token == TokenType.OBJECT_IDENTIFIER: 3739 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3740 3741 nested = type_token in self.NESTED_TYPE_TOKENS 3742 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3743 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3744 expressions = None 3745 maybe_func = False 3746 3747 if self._match(TokenType.L_PAREN): 3748 if is_struct: 3749 expressions = self._parse_csv(self._parse_struct_types) 3750 elif nested: 3751 expressions = self._parse_csv( 3752 lambda: self._parse_types( 3753 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3754 ) 3755 ) 3756 elif type_token in self.ENUM_TYPE_TOKENS: 3757 expressions = self._parse_csv(self._parse_equality) 3758 elif is_aggregate: 3759 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3760 any_token=False, tokens=(TokenType.VAR,) 3761 ) 3762 if not func_or_ident or not self._match(TokenType.COMMA): 3763 return None 3764 expressions = self._parse_csv( 3765 lambda: self._parse_types( 3766 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3767 ) 3768 ) 3769 expressions.insert(0, func_or_ident) 3770 else: 3771 expressions = self._parse_csv(self._parse_type_size) 3772 3773 if not expressions or not self._match(TokenType.R_PAREN): 3774 self._retreat(index) 3775 return None 3776 3777 maybe_func = True 3778 3779 this: t.Optional[exp.Expression] = None 3780 values: t.Optional[t.List[exp.Expression]] = None 3781 3782 if nested and self._match(TokenType.LT): 3783 if is_struct: 3784 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 3785 else: 3786 expressions = self._parse_csv( 3787 lambda: self._parse_types( 3788 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3789 ) 3790 ) 3791 3792 if not self._match(TokenType.GT): 3793 self.raise_error("Expecting >") 3794 3795 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3796 values = self._parse_csv(self._parse_conjunction) 3797 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3798 3799 if type_token in self.TIMESTAMPS: 3800 if self._match_text_seq("WITH", "TIME", "ZONE"): 3801 maybe_func = False 3802 tz_type = ( 3803 exp.DataType.Type.TIMETZ 3804 if type_token in self.TIMES 3805 else exp.DataType.Type.TIMESTAMPTZ 3806 ) 3807 this = exp.DataType(this=tz_type, expressions=expressions) 3808 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3809 maybe_func = False 3810 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3811 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3812 maybe_func = False 3813 elif type_token == TokenType.INTERVAL: 3814 unit = self._parse_var() 3815 3816 if self._match_text_seq("TO"): 3817 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3818 else: 3819 span = None 3820 3821 if span or not unit: 3822 this = self.expression( 3823 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3824 ) 3825 else: 3826 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3827 3828 if maybe_func and check_func: 3829 index2 = self._index 3830 peek = self._parse_string() 3831 3832 if not peek: 3833 self._retreat(index) 3834 return None 3835 3836 self._retreat(index2) 3837 3838 if not this: 3839 if self._match_text_seq("UNSIGNED"): 3840 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3841 if not unsigned_type_token: 3842 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3843 3844 type_token = unsigned_type_token or type_token 3845 3846 this = exp.DataType( 3847 this=exp.DataType.Type[type_token.value], 3848 expressions=expressions, 3849 nested=nested, 3850 values=values, 3851 prefix=prefix, 3852 ) 3853 3854 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3855 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3856 3857 return this 3858 3859 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 3860 index = self._index 3861 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3862 self._match(TokenType.COLON) 3863 column_def = self._parse_column_def(this) 3864 3865 if type_required and ( 3866 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 3867 ): 3868 self._retreat(index) 3869 return self._parse_types() 3870 3871 return column_def 3872 3873 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3874 if not self._match_text_seq("AT", "TIME", "ZONE"): 3875 return this 3876 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3877 3878 def _parse_column(self) -> t.Optional[exp.Expression]: 3879 this = self._parse_column_reference() 3880 return self._parse_column_ops(this) if this else self._parse_bracket(this) 3881 3882 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 3883 this = self._parse_field() 3884 if ( 3885 not this 3886 and self._match(TokenType.VALUES, advance=False) 3887 and self.VALUES_FOLLOWED_BY_PAREN 3888 and (not self._next or self._next.token_type != TokenType.L_PAREN) 3889 ): 3890 this = self._parse_id_var() 3891 3892 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 3893 3894 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3895 this = self._parse_bracket(this) 3896 3897 while self._match_set(self.COLUMN_OPERATORS): 3898 op_token = self._prev.token_type 3899 op = self.COLUMN_OPERATORS.get(op_token) 3900 3901 if op_token == TokenType.DCOLON: 3902 field = self._parse_types() 3903 if not field: 3904 self.raise_error("Expected type") 3905 elif op and self._curr: 3906 field = self._parse_column_reference() 3907 else: 3908 field = self._parse_field(anonymous_func=True, any_token=True) 3909 3910 if isinstance(field, exp.Func): 3911 # bigquery allows function calls like x.y.count(...) 3912 # SAFE.SUBSTR(...) 3913 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3914 this = self._replace_columns_with_dots(this) 3915 3916 if op: 3917 this = op(self, this, field) 3918 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3919 this = self.expression( 3920 exp.Column, 3921 this=field, 3922 table=this.this, 3923 db=this.args.get("table"), 3924 catalog=this.args.get("db"), 3925 ) 3926 else: 3927 this = self.expression(exp.Dot, this=this, expression=field) 3928 this = self._parse_bracket(this) 3929 return this 3930 3931 def _parse_primary(self) -> t.Optional[exp.Expression]: 3932 if self._match_set(self.PRIMARY_PARSERS): 3933 token_type = self._prev.token_type 3934 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3935 3936 if token_type == TokenType.STRING: 3937 expressions = [primary] 3938 while self._match(TokenType.STRING): 3939 expressions.append(exp.Literal.string(self._prev.text)) 3940 3941 if len(expressions) > 1: 3942 return self.expression(exp.Concat, expressions=expressions) 3943 3944 return primary 3945 3946 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3947 return exp.Literal.number(f"0.{self._prev.text}") 3948 3949 if self._match(TokenType.L_PAREN): 3950 comments = self._prev_comments 3951 query = self._parse_select() 3952 3953 if query: 3954 expressions = [query] 3955 else: 3956 expressions = self._parse_expressions() 3957 3958 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3959 3960 if isinstance(this, exp.Subqueryable): 3961 this = self._parse_set_operations( 3962 self._parse_subquery(this=this, parse_alias=False) 3963 ) 3964 elif len(expressions) > 1: 3965 this = self.expression(exp.Tuple, expressions=expressions) 3966 else: 3967 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3968 3969 if this: 3970 this.add_comments(comments) 3971 3972 self._match_r_paren(expression=this) 3973 return this 3974 3975 return None 3976 3977 def _parse_field( 3978 self, 3979 any_token: bool = False, 3980 tokens: t.Optional[t.Collection[TokenType]] = None, 3981 anonymous_func: bool = False, 3982 ) -> t.Optional[exp.Expression]: 3983 return ( 3984 self._parse_primary() 3985 or self._parse_function(anonymous=anonymous_func) 3986 or self._parse_id_var(any_token=any_token, tokens=tokens) 3987 ) 3988 3989 def _parse_function( 3990 self, 3991 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3992 anonymous: bool = False, 3993 optional_parens: bool = True, 3994 ) -> t.Optional[exp.Expression]: 3995 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3996 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3997 fn_syntax = False 3998 if ( 3999 self._match(TokenType.L_BRACE, advance=False) 4000 and self._next 4001 and self._next.text.upper() == "FN" 4002 ): 4003 self._advance(2) 4004 fn_syntax = True 4005 4006 func = self._parse_function_call( 4007 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4008 ) 4009 4010 if fn_syntax: 4011 self._match(TokenType.R_BRACE) 4012 4013 return func 4014 4015 def _parse_function_call( 4016 self, 4017 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4018 anonymous: bool = False, 4019 optional_parens: bool = True, 4020 ) -> t.Optional[exp.Expression]: 4021 if not self._curr: 4022 return None 4023 4024 comments = self._curr.comments 4025 token_type = self._curr.token_type 4026 this = self._curr.text 4027 upper = this.upper() 4028 4029 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4030 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4031 self._advance() 4032 return parser(self) 4033 4034 if not self._next or self._next.token_type != TokenType.L_PAREN: 4035 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4036 self._advance() 4037 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4038 4039 return None 4040 4041 if token_type not in self.FUNC_TOKENS: 4042 return None 4043 4044 self._advance(2) 4045 4046 parser = self.FUNCTION_PARSERS.get(upper) 4047 if parser and not anonymous: 4048 this = parser(self) 4049 else: 4050 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4051 4052 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4053 this = self.expression(subquery_predicate, this=self._parse_select()) 4054 self._match_r_paren() 4055 return this 4056 4057 if functions is None: 4058 functions = self.FUNCTIONS 4059 4060 function = functions.get(upper) 4061 4062 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4063 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4064 4065 if function and not anonymous: 4066 if "dialect" in function.__code__.co_varnames: 4067 func = function(args, dialect=self.dialect) 4068 else: 4069 func = function(args) 4070 4071 func = self.validate_expression(func, args) 4072 if not self.dialect.NORMALIZE_FUNCTIONS: 4073 func.meta["name"] = this 4074 4075 this = func 4076 else: 4077 this = self.expression(exp.Anonymous, this=this, expressions=args) 4078 4079 if isinstance(this, exp.Expression): 4080 this.add_comments(comments) 4081 4082 self._match_r_paren(this) 4083 return self._parse_window(this) 4084 4085 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4086 return self._parse_column_def(self._parse_id_var()) 4087 4088 def _parse_user_defined_function( 4089 self, kind: t.Optional[TokenType] = None 4090 ) -> t.Optional[exp.Expression]: 4091 this = self._parse_id_var() 4092 4093 while self._match(TokenType.DOT): 4094 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4095 4096 if not self._match(TokenType.L_PAREN): 4097 return this 4098 4099 expressions = self._parse_csv(self._parse_function_parameter) 4100 self._match_r_paren() 4101 return self.expression( 4102 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4103 ) 4104 4105 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4106 literal = self._parse_primary() 4107 if literal: 4108 return self.expression(exp.Introducer, this=token.text, expression=literal) 4109 4110 return self.expression(exp.Identifier, this=token.text) 4111 4112 def _parse_session_parameter(self) -> exp.SessionParameter: 4113 kind = None 4114 this = self._parse_id_var() or self._parse_primary() 4115 4116 if this and self._match(TokenType.DOT): 4117 kind = this.name 4118 this = self._parse_var() or self._parse_primary() 4119 4120 return self.expression(exp.SessionParameter, this=this, kind=kind) 4121 4122 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4123 index = self._index 4124 4125 if self._match(TokenType.L_PAREN): 4126 expressions = t.cast( 4127 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4128 ) 4129 4130 if not self._match(TokenType.R_PAREN): 4131 self._retreat(index) 4132 else: 4133 expressions = [self._parse_id_var()] 4134 4135 if self._match_set(self.LAMBDAS): 4136 return self.LAMBDAS[self._prev.token_type](self, expressions) 4137 4138 self._retreat(index) 4139 4140 this: t.Optional[exp.Expression] 4141 4142 if self._match(TokenType.DISTINCT): 4143 this = self.expression( 4144 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4145 ) 4146 else: 4147 this = self._parse_select_or_expression(alias=alias) 4148 4149 return self._parse_limit( 4150 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4151 ) 4152 4153 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4154 index = self._index 4155 4156 if not self.errors: 4157 try: 4158 if self._parse_select(nested=True): 4159 return this 4160 except ParseError: 4161 pass 4162 finally: 4163 self.errors.clear() 4164 self._retreat(index) 4165 4166 if not self._match(TokenType.L_PAREN): 4167 return this 4168 4169 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4170 4171 self._match_r_paren() 4172 return self.expression(exp.Schema, this=this, expressions=args) 4173 4174 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4175 return self._parse_column_def(self._parse_field(any_token=True)) 4176 4177 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4178 # column defs are not really columns, they're identifiers 4179 if isinstance(this, exp.Column): 4180 this = this.this 4181 4182 kind = self._parse_types(schema=True) 4183 4184 if self._match_text_seq("FOR", "ORDINALITY"): 4185 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4186 4187 constraints: t.List[exp.Expression] = [] 4188 4189 if not kind and self._match(TokenType.ALIAS): 4190 constraints.append( 4191 self.expression( 4192 exp.ComputedColumnConstraint, 4193 this=self._parse_conjunction(), 4194 persisted=self._match_text_seq("PERSISTED"), 4195 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4196 ) 4197 ) 4198 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4199 self._match(TokenType.ALIAS) 4200 constraints.append( 4201 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4202 ) 4203 4204 while True: 4205 constraint = self._parse_column_constraint() 4206 if not constraint: 4207 break 4208 constraints.append(constraint) 4209 4210 if not kind and not constraints: 4211 return this 4212 4213 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4214 4215 def _parse_auto_increment( 4216 self, 4217 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4218 start = None 4219 increment = None 4220 4221 if self._match(TokenType.L_PAREN, advance=False): 4222 args = self._parse_wrapped_csv(self._parse_bitwise) 4223 start = seq_get(args, 0) 4224 increment = seq_get(args, 1) 4225 elif self._match_text_seq("START"): 4226 start = self._parse_bitwise() 4227 self._match_text_seq("INCREMENT") 4228 increment = self._parse_bitwise() 4229 4230 if start and increment: 4231 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4232 4233 return exp.AutoIncrementColumnConstraint() 4234 4235 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4236 if not self._match_text_seq("REFRESH"): 4237 self._retreat(self._index - 1) 4238 return None 4239 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4240 4241 def _parse_compress(self) -> exp.CompressColumnConstraint: 4242 if self._match(TokenType.L_PAREN, advance=False): 4243 return self.expression( 4244 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4245 ) 4246 4247 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4248 4249 def _parse_generated_as_identity( 4250 self, 4251 ) -> ( 4252 exp.GeneratedAsIdentityColumnConstraint 4253 | exp.ComputedColumnConstraint 4254 | exp.GeneratedAsRowColumnConstraint 4255 ): 4256 if self._match_text_seq("BY", "DEFAULT"): 4257 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4258 this = self.expression( 4259 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4260 ) 4261 else: 4262 self._match_text_seq("ALWAYS") 4263 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4264 4265 self._match(TokenType.ALIAS) 4266 4267 if self._match_text_seq("ROW"): 4268 start = self._match_text_seq("START") 4269 if not start: 4270 self._match(TokenType.END) 4271 hidden = self._match_text_seq("HIDDEN") 4272 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4273 4274 identity = self._match_text_seq("IDENTITY") 4275 4276 if self._match(TokenType.L_PAREN): 4277 if self._match(TokenType.START_WITH): 4278 this.set("start", self._parse_bitwise()) 4279 if self._match_text_seq("INCREMENT", "BY"): 4280 this.set("increment", self._parse_bitwise()) 4281 if self._match_text_seq("MINVALUE"): 4282 this.set("minvalue", self._parse_bitwise()) 4283 if self._match_text_seq("MAXVALUE"): 4284 this.set("maxvalue", self._parse_bitwise()) 4285 4286 if self._match_text_seq("CYCLE"): 4287 this.set("cycle", True) 4288 elif self._match_text_seq("NO", "CYCLE"): 4289 this.set("cycle", False) 4290 4291 if not identity: 4292 this.set("expression", self._parse_bitwise()) 4293 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4294 args = self._parse_csv(self._parse_bitwise) 4295 this.set("start", seq_get(args, 0)) 4296 this.set("increment", seq_get(args, 1)) 4297 4298 self._match_r_paren() 4299 4300 return this 4301 4302 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4303 self._match_text_seq("LENGTH") 4304 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4305 4306 def _parse_not_constraint( 4307 self, 4308 ) -> t.Optional[exp.Expression]: 4309 if self._match_text_seq("NULL"): 4310 return self.expression(exp.NotNullColumnConstraint) 4311 if self._match_text_seq("CASESPECIFIC"): 4312 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4313 if self._match_text_seq("FOR", "REPLICATION"): 4314 return self.expression(exp.NotForReplicationColumnConstraint) 4315 return None 4316 4317 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4318 if self._match(TokenType.CONSTRAINT): 4319 this = self._parse_id_var() 4320 else: 4321 this = None 4322 4323 if self._match_texts(self.CONSTRAINT_PARSERS): 4324 return self.expression( 4325 exp.ColumnConstraint, 4326 this=this, 4327 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4328 ) 4329 4330 return this 4331 4332 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4333 if not self._match(TokenType.CONSTRAINT): 4334 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4335 4336 this = self._parse_id_var() 4337 expressions = [] 4338 4339 while True: 4340 constraint = self._parse_unnamed_constraint() or self._parse_function() 4341 if not constraint: 4342 break 4343 expressions.append(constraint) 4344 4345 return self.expression(exp.Constraint, this=this, expressions=expressions) 4346 4347 def _parse_unnamed_constraint( 4348 self, constraints: t.Optional[t.Collection[str]] = None 4349 ) -> t.Optional[exp.Expression]: 4350 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4351 constraints or self.CONSTRAINT_PARSERS 4352 ): 4353 return None 4354 4355 constraint = self._prev.text.upper() 4356 if constraint not in self.CONSTRAINT_PARSERS: 4357 self.raise_error(f"No parser found for schema constraint {constraint}.") 4358 4359 return self.CONSTRAINT_PARSERS[constraint](self) 4360 4361 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4362 self._match_text_seq("KEY") 4363 return self.expression( 4364 exp.UniqueColumnConstraint, 4365 this=self._parse_schema(self._parse_id_var(any_token=False)), 4366 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4367 ) 4368 4369 def _parse_key_constraint_options(self) -> t.List[str]: 4370 options = [] 4371 while True: 4372 if not self._curr: 4373 break 4374 4375 if self._match(TokenType.ON): 4376 action = None 4377 on = self._advance_any() and self._prev.text 4378 4379 if self._match_text_seq("NO", "ACTION"): 4380 action = "NO ACTION" 4381 elif self._match_text_seq("CASCADE"): 4382 action = "CASCADE" 4383 elif self._match_text_seq("RESTRICT"): 4384 action = "RESTRICT" 4385 elif self._match_pair(TokenType.SET, TokenType.NULL): 4386 action = "SET NULL" 4387 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4388 action = "SET DEFAULT" 4389 else: 4390 self.raise_error("Invalid key constraint") 4391 4392 options.append(f"ON {on} {action}") 4393 elif self._match_text_seq("NOT", "ENFORCED"): 4394 options.append("NOT ENFORCED") 4395 elif self._match_text_seq("DEFERRABLE"): 4396 options.append("DEFERRABLE") 4397 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4398 options.append("INITIALLY DEFERRED") 4399 elif self._match_text_seq("NORELY"): 4400 options.append("NORELY") 4401 elif self._match_text_seq("MATCH", "FULL"): 4402 options.append("MATCH FULL") 4403 else: 4404 break 4405 4406 return options 4407 4408 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4409 if match and not self._match(TokenType.REFERENCES): 4410 return None 4411 4412 expressions = None 4413 this = self._parse_table(schema=True) 4414 options = self._parse_key_constraint_options() 4415 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4416 4417 def _parse_foreign_key(self) -> exp.ForeignKey: 4418 expressions = self._parse_wrapped_id_vars() 4419 reference = self._parse_references() 4420 options = {} 4421 4422 while self._match(TokenType.ON): 4423 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4424 self.raise_error("Expected DELETE or UPDATE") 4425 4426 kind = self._prev.text.lower() 4427 4428 if self._match_text_seq("NO", "ACTION"): 4429 action = "NO ACTION" 4430 elif self._match(TokenType.SET): 4431 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4432 action = "SET " + self._prev.text.upper() 4433 else: 4434 self._advance() 4435 action = self._prev.text.upper() 4436 4437 options[kind] = action 4438 4439 return self.expression( 4440 exp.ForeignKey, 4441 expressions=expressions, 4442 reference=reference, 4443 **options, # type: ignore 4444 ) 4445 4446 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4447 return self._parse_field() 4448 4449 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4450 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4451 self._retreat(self._index - 1) 4452 return None 4453 4454 id_vars = self._parse_wrapped_id_vars() 4455 return self.expression( 4456 exp.PeriodForSystemTimeConstraint, 4457 this=seq_get(id_vars, 0), 4458 expression=seq_get(id_vars, 1), 4459 ) 4460 4461 def _parse_primary_key( 4462 self, wrapped_optional: bool = False, in_props: bool = False 4463 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4464 desc = ( 4465 self._match_set((TokenType.ASC, TokenType.DESC)) 4466 and self._prev.token_type == TokenType.DESC 4467 ) 4468 4469 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4470 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4471 4472 expressions = self._parse_wrapped_csv( 4473 self._parse_primary_key_part, optional=wrapped_optional 4474 ) 4475 options = self._parse_key_constraint_options() 4476 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4477 4478 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4479 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4480 4481 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4482 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4483 return this 4484 4485 bracket_kind = self._prev.token_type 4486 expressions = self._parse_csv( 4487 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4488 ) 4489 4490 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4491 self.raise_error("Expected ]") 4492 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4493 self.raise_error("Expected }") 4494 4495 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4496 if bracket_kind == TokenType.L_BRACE: 4497 this = self.expression(exp.Struct, expressions=expressions) 4498 elif not this or this.name.upper() == "ARRAY": 4499 this = self.expression(exp.Array, expressions=expressions) 4500 else: 4501 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4502 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4503 4504 self._add_comments(this) 4505 return self._parse_bracket(this) 4506 4507 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4508 if self._match(TokenType.COLON): 4509 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4510 return this 4511 4512 def _parse_case(self) -> t.Optional[exp.Expression]: 4513 ifs = [] 4514 default = None 4515 4516 comments = self._prev_comments 4517 expression = self._parse_conjunction() 4518 4519 while self._match(TokenType.WHEN): 4520 this = self._parse_conjunction() 4521 self._match(TokenType.THEN) 4522 then = self._parse_conjunction() 4523 ifs.append(self.expression(exp.If, this=this, true=then)) 4524 4525 if self._match(TokenType.ELSE): 4526 default = self._parse_conjunction() 4527 4528 if not self._match(TokenType.END): 4529 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4530 default = exp.column("interval") 4531 else: 4532 self.raise_error("Expected END after CASE", self._prev) 4533 4534 return self._parse_window( 4535 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4536 ) 4537 4538 def _parse_if(self) -> t.Optional[exp.Expression]: 4539 if self._match(TokenType.L_PAREN): 4540 args = self._parse_csv(self._parse_conjunction) 4541 this = self.validate_expression(exp.If.from_arg_list(args), args) 4542 self._match_r_paren() 4543 else: 4544 index = self._index - 1 4545 4546 if self.NO_PAREN_IF_COMMANDS and index == 0: 4547 return self._parse_as_command(self._prev) 4548 4549 condition = self._parse_conjunction() 4550 4551 if not condition: 4552 self._retreat(index) 4553 return None 4554 4555 self._match(TokenType.THEN) 4556 true = self._parse_conjunction() 4557 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4558 self._match(TokenType.END) 4559 this = self.expression(exp.If, this=condition, true=true, false=false) 4560 4561 return self._parse_window(this) 4562 4563 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4564 if not self._match_text_seq("VALUE", "FOR"): 4565 self._retreat(self._index - 1) 4566 return None 4567 4568 return self.expression( 4569 exp.NextValueFor, 4570 this=self._parse_column(), 4571 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4572 ) 4573 4574 def _parse_extract(self) -> exp.Extract: 4575 this = self._parse_function() or self._parse_var() or self._parse_type() 4576 4577 if self._match(TokenType.FROM): 4578 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4579 4580 if not self._match(TokenType.COMMA): 4581 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4582 4583 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4584 4585 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4586 this = self._parse_conjunction() 4587 4588 if not self._match(TokenType.ALIAS): 4589 if self._match(TokenType.COMMA): 4590 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4591 4592 self.raise_error("Expected AS after CAST") 4593 4594 fmt = None 4595 to = self._parse_types() 4596 4597 if self._match(TokenType.FORMAT): 4598 fmt_string = self._parse_string() 4599 fmt = self._parse_at_time_zone(fmt_string) 4600 4601 if not to: 4602 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4603 if to.this in exp.DataType.TEMPORAL_TYPES: 4604 this = self.expression( 4605 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4606 this=this, 4607 format=exp.Literal.string( 4608 format_time( 4609 fmt_string.this if fmt_string else "", 4610 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4611 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4612 ) 4613 ), 4614 ) 4615 4616 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4617 this.set("zone", fmt.args["zone"]) 4618 return this 4619 elif not to: 4620 self.raise_error("Expected TYPE after CAST") 4621 elif isinstance(to, exp.Identifier): 4622 to = exp.DataType.build(to.name, udt=True) 4623 elif to.this == exp.DataType.Type.CHAR: 4624 if self._match(TokenType.CHARACTER_SET): 4625 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4626 4627 return self.expression( 4628 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4629 ) 4630 4631 def _parse_string_agg(self) -> exp.Expression: 4632 if self._match(TokenType.DISTINCT): 4633 args: t.List[t.Optional[exp.Expression]] = [ 4634 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4635 ] 4636 if self._match(TokenType.COMMA): 4637 args.extend(self._parse_csv(self._parse_conjunction)) 4638 else: 4639 args = self._parse_csv(self._parse_conjunction) # type: ignore 4640 4641 index = self._index 4642 if not self._match(TokenType.R_PAREN) and args: 4643 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4644 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4645 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4646 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4647 4648 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4649 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4650 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4651 if not self._match_text_seq("WITHIN", "GROUP"): 4652 self._retreat(index) 4653 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4654 4655 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4656 order = self._parse_order(this=seq_get(args, 0)) 4657 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4658 4659 def _parse_convert( 4660 self, strict: bool, safe: t.Optional[bool] = None 4661 ) -> t.Optional[exp.Expression]: 4662 this = self._parse_bitwise() 4663 4664 if self._match(TokenType.USING): 4665 to: t.Optional[exp.Expression] = self.expression( 4666 exp.CharacterSet, this=self._parse_var() 4667 ) 4668 elif self._match(TokenType.COMMA): 4669 to = self._parse_types() 4670 else: 4671 to = None 4672 4673 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4674 4675 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4676 """ 4677 There are generally two variants of the DECODE function: 4678 4679 - DECODE(bin, charset) 4680 - DECODE(expression, search, result [, search, result] ... [, default]) 4681 4682 The second variant will always be parsed into a CASE expression. Note that NULL 4683 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4684 instead of relying on pattern matching. 4685 """ 4686 args = self._parse_csv(self._parse_conjunction) 4687 4688 if len(args) < 3: 4689 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4690 4691 expression, *expressions = args 4692 if not expression: 4693 return None 4694 4695 ifs = [] 4696 for search, result in zip(expressions[::2], expressions[1::2]): 4697 if not search or not result: 4698 return None 4699 4700 if isinstance(search, exp.Literal): 4701 ifs.append( 4702 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4703 ) 4704 elif isinstance(search, exp.Null): 4705 ifs.append( 4706 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4707 ) 4708 else: 4709 cond = exp.or_( 4710 exp.EQ(this=expression.copy(), expression=search), 4711 exp.and_( 4712 exp.Is(this=expression.copy(), expression=exp.Null()), 4713 exp.Is(this=search.copy(), expression=exp.Null()), 4714 copy=False, 4715 ), 4716 copy=False, 4717 ) 4718 ifs.append(exp.If(this=cond, true=result)) 4719 4720 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4721 4722 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4723 self._match_text_seq("KEY") 4724 key = self._parse_column() 4725 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4726 self._match_text_seq("VALUE") 4727 value = self._parse_bitwise() 4728 4729 if not key and not value: 4730 return None 4731 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4732 4733 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4734 if not this or not self._match_text_seq("FORMAT", "JSON"): 4735 return this 4736 4737 return self.expression(exp.FormatJson, this=this) 4738 4739 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4740 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4741 for value in values: 4742 if self._match_text_seq(value, "ON", on): 4743 return f"{value} ON {on}" 4744 4745 return None 4746 4747 @t.overload 4748 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 4749 ... 4750 4751 @t.overload 4752 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 4753 ... 4754 4755 def _parse_json_object(self, agg=False): 4756 star = self._parse_star() 4757 expressions = ( 4758 [star] 4759 if star 4760 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4761 ) 4762 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4763 4764 unique_keys = None 4765 if self._match_text_seq("WITH", "UNIQUE"): 4766 unique_keys = True 4767 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4768 unique_keys = False 4769 4770 self._match_text_seq("KEYS") 4771 4772 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4773 self._parse_type() 4774 ) 4775 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4776 4777 return self.expression( 4778 exp.JSONObjectAgg if agg else exp.JSONObject, 4779 expressions=expressions, 4780 null_handling=null_handling, 4781 unique_keys=unique_keys, 4782 return_type=return_type, 4783 encoding=encoding, 4784 ) 4785 4786 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4787 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4788 if not self._match_text_seq("NESTED"): 4789 this = self._parse_id_var() 4790 kind = self._parse_types(allow_identifiers=False) 4791 nested = None 4792 else: 4793 this = None 4794 kind = None 4795 nested = True 4796 4797 path = self._match_text_seq("PATH") and self._parse_string() 4798 nested_schema = nested and self._parse_json_schema() 4799 4800 return self.expression( 4801 exp.JSONColumnDef, 4802 this=this, 4803 kind=kind, 4804 path=path, 4805 nested_schema=nested_schema, 4806 ) 4807 4808 def _parse_json_schema(self) -> exp.JSONSchema: 4809 self._match_text_seq("COLUMNS") 4810 return self.expression( 4811 exp.JSONSchema, 4812 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4813 ) 4814 4815 def _parse_json_table(self) -> exp.JSONTable: 4816 this = self._parse_format_json(self._parse_bitwise()) 4817 path = self._match(TokenType.COMMA) and self._parse_string() 4818 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4819 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4820 schema = self._parse_json_schema() 4821 4822 return exp.JSONTable( 4823 this=this, 4824 schema=schema, 4825 path=path, 4826 error_handling=error_handling, 4827 empty_handling=empty_handling, 4828 ) 4829 4830 def _parse_match_against(self) -> exp.MatchAgainst: 4831 expressions = self._parse_csv(self._parse_column) 4832 4833 self._match_text_seq(")", "AGAINST", "(") 4834 4835 this = self._parse_string() 4836 4837 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4838 modifier = "IN NATURAL LANGUAGE MODE" 4839 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4840 modifier = f"{modifier} WITH QUERY EXPANSION" 4841 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4842 modifier = "IN BOOLEAN MODE" 4843 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4844 modifier = "WITH QUERY EXPANSION" 4845 else: 4846 modifier = None 4847 4848 return self.expression( 4849 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4850 ) 4851 4852 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4853 def _parse_open_json(self) -> exp.OpenJSON: 4854 this = self._parse_bitwise() 4855 path = self._match(TokenType.COMMA) and self._parse_string() 4856 4857 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4858 this = self._parse_field(any_token=True) 4859 kind = self._parse_types() 4860 path = self._parse_string() 4861 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4862 4863 return self.expression( 4864 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4865 ) 4866 4867 expressions = None 4868 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4869 self._match_l_paren() 4870 expressions = self._parse_csv(_parse_open_json_column_def) 4871 4872 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4873 4874 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4875 args = self._parse_csv(self._parse_bitwise) 4876 4877 if self._match(TokenType.IN): 4878 return self.expression( 4879 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4880 ) 4881 4882 if haystack_first: 4883 haystack = seq_get(args, 0) 4884 needle = seq_get(args, 1) 4885 else: 4886 needle = seq_get(args, 0) 4887 haystack = seq_get(args, 1) 4888 4889 return self.expression( 4890 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4891 ) 4892 4893 def _parse_predict(self) -> exp.Predict: 4894 self._match_text_seq("MODEL") 4895 this = self._parse_table() 4896 4897 self._match(TokenType.COMMA) 4898 self._match_text_seq("TABLE") 4899 4900 return self.expression( 4901 exp.Predict, 4902 this=this, 4903 expression=self._parse_table(), 4904 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4905 ) 4906 4907 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4908 args = self._parse_csv(self._parse_table) 4909 return exp.JoinHint(this=func_name.upper(), expressions=args) 4910 4911 def _parse_substring(self) -> exp.Substring: 4912 # Postgres supports the form: substring(string [from int] [for int]) 4913 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4914 4915 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4916 4917 if self._match(TokenType.FROM): 4918 args.append(self._parse_bitwise()) 4919 if self._match(TokenType.FOR): 4920 args.append(self._parse_bitwise()) 4921 4922 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4923 4924 def _parse_trim(self) -> exp.Trim: 4925 # https://www.w3resource.com/sql/character-functions/trim.php 4926 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4927 4928 position = None 4929 collation = None 4930 expression = None 4931 4932 if self._match_texts(self.TRIM_TYPES): 4933 position = self._prev.text.upper() 4934 4935 this = self._parse_bitwise() 4936 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4937 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4938 expression = self._parse_bitwise() 4939 4940 if invert_order: 4941 this, expression = expression, this 4942 4943 if self._match(TokenType.COLLATE): 4944 collation = self._parse_bitwise() 4945 4946 return self.expression( 4947 exp.Trim, this=this, position=position, expression=expression, collation=collation 4948 ) 4949 4950 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4951 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4952 4953 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4954 return self._parse_window(self._parse_id_var(), alias=True) 4955 4956 def _parse_respect_or_ignore_nulls( 4957 self, this: t.Optional[exp.Expression] 4958 ) -> t.Optional[exp.Expression]: 4959 if self._match_text_seq("IGNORE", "NULLS"): 4960 return self.expression(exp.IgnoreNulls, this=this) 4961 if self._match_text_seq("RESPECT", "NULLS"): 4962 return self.expression(exp.RespectNulls, this=this) 4963 return this 4964 4965 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4966 if self._match(TokenType.HAVING): 4967 self._match_texts(("MAX", "MIN")) 4968 max = self._prev.text.upper() != "MIN" 4969 return self.expression( 4970 exp.HavingMax, this=this, expression=self._parse_column(), max=max 4971 ) 4972 4973 return this 4974 4975 def _parse_window( 4976 self, this: t.Optional[exp.Expression], alias: bool = False 4977 ) -> t.Optional[exp.Expression]: 4978 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4979 self._match(TokenType.WHERE) 4980 this = self.expression( 4981 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4982 ) 4983 self._match_r_paren() 4984 4985 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4986 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4987 if self._match_text_seq("WITHIN", "GROUP"): 4988 order = self._parse_wrapped(self._parse_order) 4989 this = self.expression(exp.WithinGroup, this=this, expression=order) 4990 4991 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4992 # Some dialects choose to implement and some do not. 4993 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4994 4995 # There is some code above in _parse_lambda that handles 4996 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4997 4998 # The below changes handle 4999 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5000 5001 # Oracle allows both formats 5002 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5003 # and Snowflake chose to do the same for familiarity 5004 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5005 if isinstance(this, exp.AggFunc): 5006 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5007 5008 if ignore_respect and ignore_respect is not this: 5009 ignore_respect.replace(ignore_respect.this) 5010 this = self.expression(ignore_respect.__class__, this=this) 5011 5012 this = self._parse_respect_or_ignore_nulls(this) 5013 5014 # bigquery select from window x AS (partition by ...) 5015 if alias: 5016 over = None 5017 self._match(TokenType.ALIAS) 5018 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5019 return this 5020 else: 5021 over = self._prev.text.upper() 5022 5023 if not self._match(TokenType.L_PAREN): 5024 return self.expression( 5025 exp.Window, this=this, alias=self._parse_id_var(False), over=over 5026 ) 5027 5028 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5029 5030 first = self._match(TokenType.FIRST) 5031 if self._match_text_seq("LAST"): 5032 first = False 5033 5034 partition, order = self._parse_partition_and_order() 5035 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5036 5037 if kind: 5038 self._match(TokenType.BETWEEN) 5039 start = self._parse_window_spec() 5040 self._match(TokenType.AND) 5041 end = self._parse_window_spec() 5042 5043 spec = self.expression( 5044 exp.WindowSpec, 5045 kind=kind, 5046 start=start["value"], 5047 start_side=start["side"], 5048 end=end["value"], 5049 end_side=end["side"], 5050 ) 5051 else: 5052 spec = None 5053 5054 self._match_r_paren() 5055 5056 window = self.expression( 5057 exp.Window, 5058 this=this, 5059 partition_by=partition, 5060 order=order, 5061 spec=spec, 5062 alias=window_alias, 5063 over=over, 5064 first=first, 5065 ) 5066 5067 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5068 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5069 return self._parse_window(window, alias=alias) 5070 5071 return window 5072 5073 def _parse_partition_and_order( 5074 self, 5075 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5076 return self._parse_partition_by(), self._parse_order() 5077 5078 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5079 self._match(TokenType.BETWEEN) 5080 5081 return { 5082 "value": ( 5083 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5084 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5085 or self._parse_bitwise() 5086 ), 5087 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5088 } 5089 5090 def _parse_alias( 5091 self, this: t.Optional[exp.Expression], explicit: bool = False 5092 ) -> t.Optional[exp.Expression]: 5093 any_token = self._match(TokenType.ALIAS) 5094 comments = self._prev_comments 5095 5096 if explicit and not any_token: 5097 return this 5098 5099 if self._match(TokenType.L_PAREN): 5100 aliases = self.expression( 5101 exp.Aliases, 5102 comments=comments, 5103 this=this, 5104 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5105 ) 5106 self._match_r_paren(aliases) 5107 return aliases 5108 5109 alias = self._parse_id_var(any_token) or ( 5110 self.STRING_ALIASES and self._parse_string_as_identifier() 5111 ) 5112 5113 if alias: 5114 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5115 column = this.this 5116 5117 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5118 if not this.comments and column and column.comments: 5119 this.comments = column.comments 5120 column.comments = None 5121 5122 return this 5123 5124 def _parse_id_var( 5125 self, 5126 any_token: bool = True, 5127 tokens: t.Optional[t.Collection[TokenType]] = None, 5128 ) -> t.Optional[exp.Expression]: 5129 identifier = self._parse_identifier() 5130 5131 if identifier: 5132 return identifier 5133 5134 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5135 quoted = self._prev.token_type == TokenType.STRING 5136 return exp.Identifier(this=self._prev.text, quoted=quoted) 5137 5138 return None 5139 5140 def _parse_string(self) -> t.Optional[exp.Expression]: 5141 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 5142 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 5143 return self._parse_placeholder() 5144 5145 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5146 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5147 5148 def _parse_number(self) -> t.Optional[exp.Expression]: 5149 if self._match(TokenType.NUMBER): 5150 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 5151 return self._parse_placeholder() 5152 5153 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5154 if self._match(TokenType.IDENTIFIER): 5155 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5156 return self._parse_placeholder() 5157 5158 def _parse_var( 5159 self, 5160 any_token: bool = False, 5161 tokens: t.Optional[t.Collection[TokenType]] = None, 5162 upper: bool = False, 5163 ) -> t.Optional[exp.Expression]: 5164 if ( 5165 (any_token and self._advance_any()) 5166 or self._match(TokenType.VAR) 5167 or (self._match_set(tokens) if tokens else False) 5168 ): 5169 return self.expression( 5170 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5171 ) 5172 return self._parse_placeholder() 5173 5174 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5175 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5176 self._advance() 5177 return self._prev 5178 return None 5179 5180 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5181 return self._parse_var() or self._parse_string() 5182 5183 def _parse_null(self) -> t.Optional[exp.Expression]: 5184 if self._match_set(self.NULL_TOKENS): 5185 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5186 return self._parse_placeholder() 5187 5188 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5189 if self._match(TokenType.TRUE): 5190 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5191 if self._match(TokenType.FALSE): 5192 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5193 return self._parse_placeholder() 5194 5195 def _parse_star(self) -> t.Optional[exp.Expression]: 5196 if self._match(TokenType.STAR): 5197 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5198 return self._parse_placeholder() 5199 5200 def _parse_parameter(self) -> exp.Parameter: 5201 def _parse_parameter_part() -> t.Optional[exp.Expression]: 5202 return ( 5203 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 5204 ) 5205 5206 self._match(TokenType.L_BRACE) 5207 this = _parse_parameter_part() 5208 expression = self._match(TokenType.COLON) and _parse_parameter_part() 5209 self._match(TokenType.R_BRACE) 5210 5211 return self.expression(exp.Parameter, this=this, expression=expression) 5212 5213 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5214 if self._match_set(self.PLACEHOLDER_PARSERS): 5215 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5216 if placeholder: 5217 return placeholder 5218 self._advance(-1) 5219 return None 5220 5221 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5222 if not self._match(TokenType.EXCEPT): 5223 return None 5224 if self._match(TokenType.L_PAREN, advance=False): 5225 return self._parse_wrapped_csv(self._parse_column) 5226 5227 except_column = self._parse_column() 5228 return [except_column] if except_column else None 5229 5230 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5231 if not self._match(TokenType.REPLACE): 5232 return None 5233 if self._match(TokenType.L_PAREN, advance=False): 5234 return self._parse_wrapped_csv(self._parse_expression) 5235 5236 replace_expression = self._parse_expression() 5237 return [replace_expression] if replace_expression else None 5238 5239 def _parse_csv( 5240 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5241 ) -> t.List[exp.Expression]: 5242 parse_result = parse_method() 5243 items = [parse_result] if parse_result is not None else [] 5244 5245 while self._match(sep): 5246 self._add_comments(parse_result) 5247 parse_result = parse_method() 5248 if parse_result is not None: 5249 items.append(parse_result) 5250 5251 return items 5252 5253 def _parse_tokens( 5254 self, parse_method: t.Callable, expressions: t.Dict 5255 ) -> t.Optional[exp.Expression]: 5256 this = parse_method() 5257 5258 while self._match_set(expressions): 5259 this = self.expression( 5260 expressions[self._prev.token_type], 5261 this=this, 5262 comments=self._prev_comments, 5263 expression=parse_method(), 5264 ) 5265 5266 return this 5267 5268 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5269 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5270 5271 def _parse_wrapped_csv( 5272 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5273 ) -> t.List[exp.Expression]: 5274 return self._parse_wrapped( 5275 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5276 ) 5277 5278 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5279 wrapped = self._match(TokenType.L_PAREN) 5280 if not wrapped and not optional: 5281 self.raise_error("Expecting (") 5282 parse_result = parse_method() 5283 if wrapped: 5284 self._match_r_paren() 5285 return parse_result 5286 5287 def _parse_expressions(self) -> t.List[exp.Expression]: 5288 return self._parse_csv(self._parse_expression) 5289 5290 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5291 return self._parse_select() or self._parse_set_operations( 5292 self._parse_expression() if alias else self._parse_conjunction() 5293 ) 5294 5295 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5296 return self._parse_query_modifiers( 5297 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5298 ) 5299 5300 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5301 this = None 5302 if self._match_texts(self.TRANSACTION_KIND): 5303 this = self._prev.text 5304 5305 self._match_texts(("TRANSACTION", "WORK")) 5306 5307 modes = [] 5308 while True: 5309 mode = [] 5310 while self._match(TokenType.VAR): 5311 mode.append(self._prev.text) 5312 5313 if mode: 5314 modes.append(" ".join(mode)) 5315 if not self._match(TokenType.COMMA): 5316 break 5317 5318 return self.expression(exp.Transaction, this=this, modes=modes) 5319 5320 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5321 chain = None 5322 savepoint = None 5323 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5324 5325 self._match_texts(("TRANSACTION", "WORK")) 5326 5327 if self._match_text_seq("TO"): 5328 self._match_text_seq("SAVEPOINT") 5329 savepoint = self._parse_id_var() 5330 5331 if self._match(TokenType.AND): 5332 chain = not self._match_text_seq("NO") 5333 self._match_text_seq("CHAIN") 5334 5335 if is_rollback: 5336 return self.expression(exp.Rollback, savepoint=savepoint) 5337 5338 return self.expression(exp.Commit, chain=chain) 5339 5340 def _parse_refresh(self) -> exp.Refresh: 5341 self._match(TokenType.TABLE) 5342 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5343 5344 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5345 if not self._match_text_seq("ADD"): 5346 return None 5347 5348 self._match(TokenType.COLUMN) 5349 exists_column = self._parse_exists(not_=True) 5350 expression = self._parse_field_def() 5351 5352 if expression: 5353 expression.set("exists", exists_column) 5354 5355 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5356 if self._match_texts(("FIRST", "AFTER")): 5357 position = self._prev.text 5358 column_position = self.expression( 5359 exp.ColumnPosition, this=self._parse_column(), position=position 5360 ) 5361 expression.set("position", column_position) 5362 5363 return expression 5364 5365 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5366 drop = self._match(TokenType.DROP) and self._parse_drop() 5367 if drop and not isinstance(drop, exp.Command): 5368 drop.set("kind", drop.args.get("kind", "COLUMN")) 5369 return drop 5370 5371 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5372 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5373 return self.expression( 5374 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5375 ) 5376 5377 def _parse_add_constraint(self) -> exp.AddConstraint: 5378 this = None 5379 kind = self._prev.token_type 5380 5381 if kind == TokenType.CONSTRAINT: 5382 this = self._parse_id_var() 5383 5384 if self._match_text_seq("CHECK"): 5385 expression = self._parse_wrapped(self._parse_conjunction) 5386 enforced = self._match_text_seq("ENFORCED") or False 5387 5388 return self.expression( 5389 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5390 ) 5391 5392 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5393 expression = self._parse_foreign_key() 5394 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5395 expression = self._parse_primary_key() 5396 else: 5397 expression = None 5398 5399 return self.expression(exp.AddConstraint, this=this, expression=expression) 5400 5401 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5402 index = self._index - 1 5403 5404 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5405 return self._parse_csv(self._parse_add_constraint) 5406 5407 self._retreat(index) 5408 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5409 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5410 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5411 5412 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5413 self._match(TokenType.COLUMN) 5414 column = self._parse_field(any_token=True) 5415 5416 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5417 return self.expression(exp.AlterColumn, this=column, drop=True) 5418 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5419 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5420 if self._match(TokenType.COMMENT): 5421 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5422 5423 self._match_text_seq("SET", "DATA") 5424 return self.expression( 5425 exp.AlterColumn, 5426 this=column, 5427 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5428 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5429 using=self._match(TokenType.USING) and self._parse_conjunction(), 5430 ) 5431 5432 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5433 index = self._index - 1 5434 5435 partition_exists = self._parse_exists() 5436 if self._match(TokenType.PARTITION, advance=False): 5437 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5438 5439 self._retreat(index) 5440 return self._parse_csv(self._parse_drop_column) 5441 5442 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5443 if self._match(TokenType.COLUMN): 5444 exists = self._parse_exists() 5445 old_column = self._parse_column() 5446 to = self._match_text_seq("TO") 5447 new_column = self._parse_column() 5448 5449 if old_column is None or to is None or new_column is None: 5450 return None 5451 5452 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5453 5454 self._match_text_seq("TO") 5455 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5456 5457 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5458 start = self._prev 5459 5460 if not self._match(TokenType.TABLE): 5461 return self._parse_as_command(start) 5462 5463 exists = self._parse_exists() 5464 only = self._match_text_seq("ONLY") 5465 this = self._parse_table(schema=True) 5466 5467 if self._next: 5468 self._advance() 5469 5470 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5471 if parser: 5472 actions = ensure_list(parser(self)) 5473 5474 if not self._curr and actions: 5475 return self.expression( 5476 exp.AlterTable, 5477 this=this, 5478 exists=exists, 5479 actions=actions, 5480 only=only, 5481 ) 5482 5483 return self._parse_as_command(start) 5484 5485 def _parse_merge(self) -> exp.Merge: 5486 self._match(TokenType.INTO) 5487 target = self._parse_table() 5488 5489 if target and self._match(TokenType.ALIAS, advance=False): 5490 target.set("alias", self._parse_table_alias()) 5491 5492 self._match(TokenType.USING) 5493 using = self._parse_table() 5494 5495 self._match(TokenType.ON) 5496 on = self._parse_conjunction() 5497 5498 return self.expression( 5499 exp.Merge, 5500 this=target, 5501 using=using, 5502 on=on, 5503 expressions=self._parse_when_matched(), 5504 ) 5505 5506 def _parse_when_matched(self) -> t.List[exp.When]: 5507 whens = [] 5508 5509 while self._match(TokenType.WHEN): 5510 matched = not self._match(TokenType.NOT) 5511 self._match_text_seq("MATCHED") 5512 source = ( 5513 False 5514 if self._match_text_seq("BY", "TARGET") 5515 else self._match_text_seq("BY", "SOURCE") 5516 ) 5517 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5518 5519 self._match(TokenType.THEN) 5520 5521 if self._match(TokenType.INSERT): 5522 _this = self._parse_star() 5523 if _this: 5524 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5525 else: 5526 then = self.expression( 5527 exp.Insert, 5528 this=self._parse_value(), 5529 expression=self._match_text_seq("VALUES") and self._parse_value(), 5530 ) 5531 elif self._match(TokenType.UPDATE): 5532 expressions = self._parse_star() 5533 if expressions: 5534 then = self.expression(exp.Update, expressions=expressions) 5535 else: 5536 then = self.expression( 5537 exp.Update, 5538 expressions=self._match(TokenType.SET) 5539 and self._parse_csv(self._parse_equality), 5540 ) 5541 elif self._match(TokenType.DELETE): 5542 then = self.expression(exp.Var, this=self._prev.text) 5543 else: 5544 then = None 5545 5546 whens.append( 5547 self.expression( 5548 exp.When, 5549 matched=matched, 5550 source=source, 5551 condition=condition, 5552 then=then, 5553 ) 5554 ) 5555 return whens 5556 5557 def _parse_show(self) -> t.Optional[exp.Expression]: 5558 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5559 if parser: 5560 return parser(self) 5561 return self._parse_as_command(self._prev) 5562 5563 def _parse_set_item_assignment( 5564 self, kind: t.Optional[str] = None 5565 ) -> t.Optional[exp.Expression]: 5566 index = self._index 5567 5568 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5569 return self._parse_set_transaction(global_=kind == "GLOBAL") 5570 5571 left = self._parse_primary() or self._parse_id_var() 5572 assignment_delimiter = self._match_texts(("=", "TO")) 5573 5574 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5575 self._retreat(index) 5576 return None 5577 5578 right = self._parse_statement() or self._parse_id_var() 5579 this = self.expression(exp.EQ, this=left, expression=right) 5580 5581 return self.expression(exp.SetItem, this=this, kind=kind) 5582 5583 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5584 self._match_text_seq("TRANSACTION") 5585 characteristics = self._parse_csv( 5586 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5587 ) 5588 return self.expression( 5589 exp.SetItem, 5590 expressions=characteristics, 5591 kind="TRANSACTION", 5592 **{"global": global_}, # type: ignore 5593 ) 5594 5595 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5596 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5597 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5598 5599 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5600 index = self._index 5601 set_ = self.expression( 5602 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5603 ) 5604 5605 if self._curr: 5606 self._retreat(index) 5607 return self._parse_as_command(self._prev) 5608 5609 return set_ 5610 5611 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5612 for option in options: 5613 if self._match_text_seq(*option.split(" ")): 5614 return exp.var(option) 5615 return None 5616 5617 def _parse_as_command(self, start: Token) -> exp.Command: 5618 while self._curr: 5619 self._advance() 5620 text = self._find_sql(start, self._prev) 5621 size = len(start.text) 5622 self._warn_unsupported() 5623 return exp.Command(this=text[:size], expression=text[size:]) 5624 5625 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5626 settings = [] 5627 5628 self._match_l_paren() 5629 kind = self._parse_id_var() 5630 5631 if self._match(TokenType.L_PAREN): 5632 while True: 5633 key = self._parse_id_var() 5634 value = self._parse_primary() 5635 5636 if not key and value is None: 5637 break 5638 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5639 self._match(TokenType.R_PAREN) 5640 5641 self._match_r_paren() 5642 5643 return self.expression( 5644 exp.DictProperty, 5645 this=this, 5646 kind=kind.this if kind else None, 5647 settings=settings, 5648 ) 5649 5650 def _parse_dict_range(self, this: str) -> exp.DictRange: 5651 self._match_l_paren() 5652 has_min = self._match_text_seq("MIN") 5653 if has_min: 5654 min = self._parse_var() or self._parse_primary() 5655 self._match_text_seq("MAX") 5656 max = self._parse_var() or self._parse_primary() 5657 else: 5658 max = self._parse_var() or self._parse_primary() 5659 min = exp.Literal.number(0) 5660 self._match_r_paren() 5661 return self.expression(exp.DictRange, this=this, min=min, max=max) 5662 5663 def _parse_comprehension( 5664 self, this: t.Optional[exp.Expression] 5665 ) -> t.Optional[exp.Comprehension]: 5666 index = self._index 5667 expression = self._parse_column() 5668 if not self._match(TokenType.IN): 5669 self._retreat(index - 1) 5670 return None 5671 iterator = self._parse_column() 5672 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5673 return self.expression( 5674 exp.Comprehension, 5675 this=this, 5676 expression=expression, 5677 iterator=iterator, 5678 condition=condition, 5679 ) 5680 5681 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5682 if self._match(TokenType.HEREDOC_STRING): 5683 return self.expression(exp.Heredoc, this=self._prev.text) 5684 5685 if not self._match_text_seq("$"): 5686 return None 5687 5688 tags = ["$"] 5689 tag_text = None 5690 5691 if self._is_connected(): 5692 self._advance() 5693 tags.append(self._prev.text.upper()) 5694 else: 5695 self.raise_error("No closing $ found") 5696 5697 if tags[-1] != "$": 5698 if self._is_connected() and self._match_text_seq("$"): 5699 tag_text = tags[-1] 5700 tags.append("$") 5701 else: 5702 self.raise_error("No closing $ found") 5703 5704 heredoc_start = self._curr 5705 5706 while self._curr: 5707 if self._match_text_seq(*tags, advance=False): 5708 this = self._find_sql(heredoc_start, self._prev) 5709 self._advance(len(tags)) 5710 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5711 5712 self._advance() 5713 5714 self.raise_error(f"No closing {''.join(tags)} found") 5715 return None 5716 5717 def _find_parser( 5718 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5719 ) -> t.Optional[t.Callable]: 5720 if not self._curr: 5721 return None 5722 5723 index = self._index 5724 this = [] 5725 while True: 5726 # The current token might be multiple words 5727 curr = self._curr.text.upper() 5728 key = curr.split(" ") 5729 this.append(curr) 5730 5731 self._advance() 5732 result, trie = in_trie(trie, key) 5733 if result == TrieResult.FAILED: 5734 break 5735 5736 if result == TrieResult.EXISTS: 5737 subparser = parsers[" ".join(this)] 5738 return subparser 5739 5740 self._retreat(index) 5741 return None 5742 5743 def _match(self, token_type, advance=True, expression=None): 5744 if not self._curr: 5745 return None 5746 5747 if self._curr.token_type == token_type: 5748 if advance: 5749 self._advance() 5750 self._add_comments(expression) 5751 return True 5752 5753 return None 5754 5755 def _match_set(self, types, advance=True): 5756 if not self._curr: 5757 return None 5758 5759 if self._curr.token_type in types: 5760 if advance: 5761 self._advance() 5762 return True 5763 5764 return None 5765 5766 def _match_pair(self, token_type_a, token_type_b, advance=True): 5767 if not self._curr or not self._next: 5768 return None 5769 5770 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5771 if advance: 5772 self._advance(2) 5773 return True 5774 5775 return None 5776 5777 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5778 if not self._match(TokenType.L_PAREN, expression=expression): 5779 self.raise_error("Expecting (") 5780 5781 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5782 if not self._match(TokenType.R_PAREN, expression=expression): 5783 self.raise_error("Expecting )") 5784 5785 def _match_texts(self, texts, advance=True): 5786 if self._curr and self._curr.text.upper() in texts: 5787 if advance: 5788 self._advance() 5789 return True 5790 return None 5791 5792 def _match_text_seq(self, *texts, advance=True): 5793 index = self._index 5794 for text in texts: 5795 if self._curr and self._curr.text.upper() == text: 5796 self._advance() 5797 else: 5798 self._retreat(index) 5799 return None 5800 5801 if not advance: 5802 self._retreat(index) 5803 5804 return True 5805 5806 @t.overload 5807 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5808 ... 5809 5810 @t.overload 5811 def _replace_columns_with_dots( 5812 self, this: t.Optional[exp.Expression] 5813 ) -> t.Optional[exp.Expression]: 5814 ... 5815 5816 def _replace_columns_with_dots(self, this): 5817 if isinstance(this, exp.Dot): 5818 exp.replace_children(this, self._replace_columns_with_dots) 5819 elif isinstance(this, exp.Column): 5820 exp.replace_children(this, self._replace_columns_with_dots) 5821 table = this.args.get("table") 5822 this = ( 5823 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5824 ) 5825 5826 return this 5827 5828 def _replace_lambda( 5829 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5830 ) -> t.Optional[exp.Expression]: 5831 if not node: 5832 return node 5833 5834 for column in node.find_all(exp.Column): 5835 if column.parts[0].name in lambda_variables: 5836 dot_or_id = column.to_dot() if column.table else column.this 5837 parent = column.parent 5838 5839 while isinstance(parent, exp.Dot): 5840 if not isinstance(parent.parent, exp.Dot): 5841 parent.replace(dot_or_id) 5842 break 5843 parent = parent.parent 5844 else: 5845 if column is node: 5846 node = dot_or_id 5847 else: 5848 column.replace(dot_or_id) 5849 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1032 def __init__( 1033 self, 1034 error_level: t.Optional[ErrorLevel] = None, 1035 error_message_context: int = 100, 1036 max_errors: int = 3, 1037 dialect: DialectType = None, 1038 ): 1039 from sqlglot.dialects import Dialect 1040 1041 self.error_level = error_level or ErrorLevel.IMMEDIATE 1042 self.error_message_context = error_message_context 1043 self.max_errors = max_errors 1044 self.dialect = Dialect.get_or_raise(dialect) 1045 self.reset()
1057 def parse( 1058 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1059 ) -> t.List[t.Optional[exp.Expression]]: 1060 """ 1061 Parses a list of tokens and returns a list of syntax trees, one tree 1062 per parsed SQL statement. 1063 1064 Args: 1065 raw_tokens: The list of tokens. 1066 sql: The original SQL string, used to produce helpful debug messages. 1067 1068 Returns: 1069 The list of the produced syntax trees. 1070 """ 1071 return self._parse( 1072 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1073 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1075 def parse_into( 1076 self, 1077 expression_types: exp.IntoType, 1078 raw_tokens: t.List[Token], 1079 sql: t.Optional[str] = None, 1080 ) -> t.List[t.Optional[exp.Expression]]: 1081 """ 1082 Parses a list of tokens into a given Expression type. If a collection of Expression 1083 types is given instead, this method will try to parse the token list into each one 1084 of them, stopping at the first for which the parsing succeeds. 1085 1086 Args: 1087 expression_types: The expression type(s) to try and parse the token list into. 1088 raw_tokens: The list of tokens. 1089 sql: The original SQL string, used to produce helpful debug messages. 1090 1091 Returns: 1092 The target Expression. 1093 """ 1094 errors = [] 1095 for expression_type in ensure_list(expression_types): 1096 parser = self.EXPRESSION_PARSERS.get(expression_type) 1097 if not parser: 1098 raise TypeError(f"No parser registered for {expression_type}") 1099 1100 try: 1101 return self._parse(parser, raw_tokens, sql) 1102 except ParseError as e: 1103 e.errors[0]["into_expression"] = expression_type 1104 errors.append(e) 1105 1106 raise ParseError( 1107 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1108 errors=merge_errors(errors), 1109 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1146 def check_errors(self) -> None: 1147 """Logs or raises any found errors, depending on the chosen error level setting.""" 1148 if self.error_level == ErrorLevel.WARN: 1149 for error in self.errors: 1150 logger.error(str(error)) 1151 elif self.error_level == ErrorLevel.RAISE and self.errors: 1152 raise ParseError( 1153 concat_messages(self.errors, self.max_errors), 1154 errors=merge_errors(self.errors), 1155 )
Logs or raises any found errors, depending on the chosen error level setting.
1157 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1158 """ 1159 Appends an error in the list of recorded errors or raises it, depending on the chosen 1160 error level setting. 1161 """ 1162 token = token or self._curr or self._prev or Token.string("") 1163 start = token.start 1164 end = token.end + 1 1165 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1166 highlight = self.sql[start:end] 1167 end_context = self.sql[end : end + self.error_message_context] 1168 1169 error = ParseError.new( 1170 f"{message}. Line {token.line}, Col: {token.col}.\n" 1171 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1172 description=message, 1173 line=token.line, 1174 col=token.col, 1175 start_context=start_context, 1176 highlight=highlight, 1177 end_context=end_context, 1178 ) 1179 1180 if self.error_level == ErrorLevel.IMMEDIATE: 1181 raise error 1182 1183 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1185 def expression( 1186 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1187 ) -> E: 1188 """ 1189 Creates a new, validated Expression. 1190 1191 Args: 1192 exp_class: The expression class to instantiate. 1193 comments: An optional list of comments to attach to the expression. 1194 kwargs: The arguments to set for the expression along with their respective values. 1195 1196 Returns: 1197 The target expression. 1198 """ 1199 instance = exp_class(**kwargs) 1200 instance.add_comments(comments) if comments else self._add_comments(instance) 1201 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1208 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1209 """ 1210 Validates an Expression, making sure that all its mandatory arguments are set. 1211 1212 Args: 1213 expression: The expression to validate. 1214 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1215 1216 Returns: 1217 The validated expression. 1218 """ 1219 if self.error_level != ErrorLevel.IGNORE: 1220 for error_message in expression.error_messages(args): 1221 self.raise_error(error_message) 1222 1223 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.