sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18logger = logging.getLogger("sqlglot") 19 20 21def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 32 33 34def build_like(args: t.List) -> exp.Escape | exp.Like: 35 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 36 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 37 38 39def binary_range_parser( 40 expr_type: t.Type[exp.Expression], 41) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 42 return lambda self, this: self._parse_escape( 43 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 44 ) 45 46 47def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 48 # Default argument order is base, expression 49 this = seq_get(args, 0) 50 expression = seq_get(args, 1) 51 52 if expression: 53 if not dialect.LOG_BASE_FIRST: 54 this, expression = expression, this 55 return exp.Log(this=this, expression=expression) 56 57 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 58 59 60def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 61 def _builder(args: t.List, dialect: Dialect) -> E: 62 expression = expr_type( 63 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 64 ) 65 if len(args) > 2 and expr_type is exp.JSONExtract: 66 expression.set("expressions", args[2:]) 67 68 return expression 69 70 return _builder 71 72 73class _Parser(type): 74 def __new__(cls, clsname, bases, attrs): 75 klass = super().__new__(cls, clsname, bases, attrs) 76 77 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 78 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 79 80 return klass 81 82 83class Parser(metaclass=_Parser): 84 """ 85 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 86 87 Args: 88 error_level: The desired error level. 89 Default: ErrorLevel.IMMEDIATE 90 error_message_context: The amount of context to capture from a query string when displaying 91 the error message (in number of characters). 92 Default: 100 93 max_errors: Maximum number of error messages to include in a raised ParseError. 94 This is only relevant if error_level is ErrorLevel.RAISE. 95 Default: 3 96 """ 97 98 FUNCTIONS: t.Dict[str, t.Callable] = { 99 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 100 "CONCAT": lambda args, dialect: exp.Concat( 101 expressions=args, 102 safe=not dialect.STRICT_STRING_CONCAT, 103 coalesce=dialect.CONCAT_COALESCE, 104 ), 105 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 106 expressions=args, 107 safe=not dialect.STRICT_STRING_CONCAT, 108 coalesce=dialect.CONCAT_COALESCE, 109 ), 110 "DATE_TO_DATE_STR": lambda args: exp.Cast( 111 this=seq_get(args, 0), 112 to=exp.DataType(this=exp.DataType.Type.TEXT), 113 ), 114 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 115 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 116 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 117 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 118 "LIKE": build_like, 119 "LOG": build_logarithm, 120 "TIME_TO_TIME_STR": lambda args: exp.Cast( 121 this=seq_get(args, 0), 122 to=exp.DataType(this=exp.DataType.Type.TEXT), 123 ), 124 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 125 this=exp.Cast( 126 this=seq_get(args, 0), 127 to=exp.DataType(this=exp.DataType.Type.TEXT), 128 ), 129 start=exp.Literal.number(1), 130 length=exp.Literal.number(10), 131 ), 132 "VAR_MAP": build_var_map, 133 } 134 135 NO_PAREN_FUNCTIONS = { 136 TokenType.CURRENT_DATE: exp.CurrentDate, 137 TokenType.CURRENT_DATETIME: exp.CurrentDate, 138 TokenType.CURRENT_TIME: exp.CurrentTime, 139 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 140 TokenType.CURRENT_USER: exp.CurrentUser, 141 } 142 143 STRUCT_TYPE_TOKENS = { 144 TokenType.NESTED, 145 TokenType.STRUCT, 146 } 147 148 NESTED_TYPE_TOKENS = { 149 TokenType.ARRAY, 150 TokenType.LOWCARDINALITY, 151 TokenType.MAP, 152 TokenType.NULLABLE, 153 *STRUCT_TYPE_TOKENS, 154 } 155 156 ENUM_TYPE_TOKENS = { 157 TokenType.ENUM, 158 TokenType.ENUM8, 159 TokenType.ENUM16, 160 } 161 162 AGGREGATE_TYPE_TOKENS = { 163 TokenType.AGGREGATEFUNCTION, 164 TokenType.SIMPLEAGGREGATEFUNCTION, 165 } 166 167 TYPE_TOKENS = { 168 TokenType.BIT, 169 TokenType.BOOLEAN, 170 TokenType.TINYINT, 171 TokenType.UTINYINT, 172 TokenType.SMALLINT, 173 TokenType.USMALLINT, 174 TokenType.INT, 175 TokenType.UINT, 176 TokenType.BIGINT, 177 TokenType.UBIGINT, 178 TokenType.INT128, 179 TokenType.UINT128, 180 TokenType.INT256, 181 TokenType.UINT256, 182 TokenType.MEDIUMINT, 183 TokenType.UMEDIUMINT, 184 TokenType.FIXEDSTRING, 185 TokenType.FLOAT, 186 TokenType.DOUBLE, 187 TokenType.CHAR, 188 TokenType.NCHAR, 189 TokenType.VARCHAR, 190 TokenType.NVARCHAR, 191 TokenType.BPCHAR, 192 TokenType.TEXT, 193 TokenType.MEDIUMTEXT, 194 TokenType.LONGTEXT, 195 TokenType.MEDIUMBLOB, 196 TokenType.LONGBLOB, 197 TokenType.BINARY, 198 TokenType.VARBINARY, 199 TokenType.JSON, 200 TokenType.JSONB, 201 TokenType.INTERVAL, 202 TokenType.TINYBLOB, 203 TokenType.TINYTEXT, 204 TokenType.TIME, 205 TokenType.TIMETZ, 206 TokenType.TIMESTAMP, 207 TokenType.TIMESTAMP_S, 208 TokenType.TIMESTAMP_MS, 209 TokenType.TIMESTAMP_NS, 210 TokenType.TIMESTAMPTZ, 211 TokenType.TIMESTAMPLTZ, 212 TokenType.DATETIME, 213 TokenType.DATETIME64, 214 TokenType.DATE, 215 TokenType.DATE32, 216 TokenType.INT4RANGE, 217 TokenType.INT4MULTIRANGE, 218 TokenType.INT8RANGE, 219 TokenType.INT8MULTIRANGE, 220 TokenType.NUMRANGE, 221 TokenType.NUMMULTIRANGE, 222 TokenType.TSRANGE, 223 TokenType.TSMULTIRANGE, 224 TokenType.TSTZRANGE, 225 TokenType.TSTZMULTIRANGE, 226 TokenType.DATERANGE, 227 TokenType.DATEMULTIRANGE, 228 TokenType.DECIMAL, 229 TokenType.UDECIMAL, 230 TokenType.BIGDECIMAL, 231 TokenType.UUID, 232 TokenType.GEOGRAPHY, 233 TokenType.GEOMETRY, 234 TokenType.HLLSKETCH, 235 TokenType.HSTORE, 236 TokenType.PSEUDO_TYPE, 237 TokenType.SUPER, 238 TokenType.SERIAL, 239 TokenType.SMALLSERIAL, 240 TokenType.BIGSERIAL, 241 TokenType.XML, 242 TokenType.YEAR, 243 TokenType.UNIQUEIDENTIFIER, 244 TokenType.USERDEFINED, 245 TokenType.MONEY, 246 TokenType.SMALLMONEY, 247 TokenType.ROWVERSION, 248 TokenType.IMAGE, 249 TokenType.VARIANT, 250 TokenType.OBJECT, 251 TokenType.OBJECT_IDENTIFIER, 252 TokenType.INET, 253 TokenType.IPADDRESS, 254 TokenType.IPPREFIX, 255 TokenType.IPV4, 256 TokenType.IPV6, 257 TokenType.UNKNOWN, 258 TokenType.NULL, 259 *ENUM_TYPE_TOKENS, 260 *NESTED_TYPE_TOKENS, 261 *AGGREGATE_TYPE_TOKENS, 262 } 263 264 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 265 TokenType.BIGINT: TokenType.UBIGINT, 266 TokenType.INT: TokenType.UINT, 267 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 268 TokenType.SMALLINT: TokenType.USMALLINT, 269 TokenType.TINYINT: TokenType.UTINYINT, 270 TokenType.DECIMAL: TokenType.UDECIMAL, 271 } 272 273 SUBQUERY_PREDICATES = { 274 TokenType.ANY: exp.Any, 275 TokenType.ALL: exp.All, 276 TokenType.EXISTS: exp.Exists, 277 TokenType.SOME: exp.Any, 278 } 279 280 RESERVED_TOKENS = { 281 *Tokenizer.SINGLE_TOKENS.values(), 282 TokenType.SELECT, 283 } 284 285 DB_CREATABLES = { 286 TokenType.DATABASE, 287 TokenType.SCHEMA, 288 TokenType.TABLE, 289 TokenType.VIEW, 290 TokenType.MODEL, 291 TokenType.DICTIONARY, 292 TokenType.STORAGE_INTEGRATION, 293 } 294 295 CREATABLES = { 296 TokenType.COLUMN, 297 TokenType.CONSTRAINT, 298 TokenType.FUNCTION, 299 TokenType.INDEX, 300 TokenType.PROCEDURE, 301 TokenType.FOREIGN_KEY, 302 *DB_CREATABLES, 303 } 304 305 # Tokens that can represent identifiers 306 ID_VAR_TOKENS = { 307 TokenType.VAR, 308 TokenType.ANTI, 309 TokenType.APPLY, 310 TokenType.ASC, 311 TokenType.AUTO_INCREMENT, 312 TokenType.BEGIN, 313 TokenType.BPCHAR, 314 TokenType.CACHE, 315 TokenType.CASE, 316 TokenType.COLLATE, 317 TokenType.COMMAND, 318 TokenType.COMMENT, 319 TokenType.COMMIT, 320 TokenType.CONSTRAINT, 321 TokenType.DEFAULT, 322 TokenType.DELETE, 323 TokenType.DESC, 324 TokenType.DESCRIBE, 325 TokenType.DICTIONARY, 326 TokenType.DIV, 327 TokenType.END, 328 TokenType.EXECUTE, 329 TokenType.ESCAPE, 330 TokenType.FALSE, 331 TokenType.FIRST, 332 TokenType.FILTER, 333 TokenType.FINAL, 334 TokenType.FORMAT, 335 TokenType.FULL, 336 TokenType.IS, 337 TokenType.ISNULL, 338 TokenType.INTERVAL, 339 TokenType.KEEP, 340 TokenType.KILL, 341 TokenType.LEFT, 342 TokenType.LOAD, 343 TokenType.MERGE, 344 TokenType.NATURAL, 345 TokenType.NEXT, 346 TokenType.OFFSET, 347 TokenType.OPERATOR, 348 TokenType.ORDINALITY, 349 TokenType.OVERLAPS, 350 TokenType.OVERWRITE, 351 TokenType.PARTITION, 352 TokenType.PERCENT, 353 TokenType.PIVOT, 354 TokenType.PRAGMA, 355 TokenType.RANGE, 356 TokenType.RECURSIVE, 357 TokenType.REFERENCES, 358 TokenType.REFRESH, 359 TokenType.REPLACE, 360 TokenType.RIGHT, 361 TokenType.ROW, 362 TokenType.ROWS, 363 TokenType.SEMI, 364 TokenType.SET, 365 TokenType.SETTINGS, 366 TokenType.SHOW, 367 TokenType.TEMPORARY, 368 TokenType.TOP, 369 TokenType.TRUE, 370 TokenType.UNIQUE, 371 TokenType.UNPIVOT, 372 TokenType.UPDATE, 373 TokenType.USE, 374 TokenType.VOLATILE, 375 TokenType.WINDOW, 376 *CREATABLES, 377 *SUBQUERY_PREDICATES, 378 *TYPE_TOKENS, 379 *NO_PAREN_FUNCTIONS, 380 } 381 382 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 383 384 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 385 TokenType.ANTI, 386 TokenType.APPLY, 387 TokenType.ASOF, 388 TokenType.FULL, 389 TokenType.LEFT, 390 TokenType.LOCK, 391 TokenType.NATURAL, 392 TokenType.OFFSET, 393 TokenType.RIGHT, 394 TokenType.SEMI, 395 TokenType.WINDOW, 396 } 397 398 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 399 400 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 401 402 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 403 404 FUNC_TOKENS = { 405 TokenType.COLLATE, 406 TokenType.COMMAND, 407 TokenType.CURRENT_DATE, 408 TokenType.CURRENT_DATETIME, 409 TokenType.CURRENT_TIMESTAMP, 410 TokenType.CURRENT_TIME, 411 TokenType.CURRENT_USER, 412 TokenType.FILTER, 413 TokenType.FIRST, 414 TokenType.FORMAT, 415 TokenType.GLOB, 416 TokenType.IDENTIFIER, 417 TokenType.INDEX, 418 TokenType.ISNULL, 419 TokenType.ILIKE, 420 TokenType.INSERT, 421 TokenType.LIKE, 422 TokenType.MERGE, 423 TokenType.OFFSET, 424 TokenType.PRIMARY_KEY, 425 TokenType.RANGE, 426 TokenType.REPLACE, 427 TokenType.RLIKE, 428 TokenType.ROW, 429 TokenType.UNNEST, 430 TokenType.VAR, 431 TokenType.LEFT, 432 TokenType.RIGHT, 433 TokenType.DATE, 434 TokenType.DATETIME, 435 TokenType.TABLE, 436 TokenType.TIMESTAMP, 437 TokenType.TIMESTAMPTZ, 438 TokenType.WINDOW, 439 TokenType.XOR, 440 *TYPE_TOKENS, 441 *SUBQUERY_PREDICATES, 442 } 443 444 CONJUNCTION = { 445 TokenType.AND: exp.And, 446 TokenType.OR: exp.Or, 447 } 448 449 EQUALITY = { 450 TokenType.COLON_EQ: exp.PropertyEQ, 451 TokenType.EQ: exp.EQ, 452 TokenType.NEQ: exp.NEQ, 453 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 454 } 455 456 COMPARISON = { 457 TokenType.GT: exp.GT, 458 TokenType.GTE: exp.GTE, 459 TokenType.LT: exp.LT, 460 TokenType.LTE: exp.LTE, 461 } 462 463 BITWISE = { 464 TokenType.AMP: exp.BitwiseAnd, 465 TokenType.CARET: exp.BitwiseXor, 466 TokenType.PIPE: exp.BitwiseOr, 467 } 468 469 TERM = { 470 TokenType.DASH: exp.Sub, 471 TokenType.PLUS: exp.Add, 472 TokenType.MOD: exp.Mod, 473 TokenType.COLLATE: exp.Collate, 474 } 475 476 FACTOR = { 477 TokenType.DIV: exp.IntDiv, 478 TokenType.LR_ARROW: exp.Distance, 479 TokenType.SLASH: exp.Div, 480 TokenType.STAR: exp.Mul, 481 } 482 483 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 484 485 TIMES = { 486 TokenType.TIME, 487 TokenType.TIMETZ, 488 } 489 490 TIMESTAMPS = { 491 TokenType.TIMESTAMP, 492 TokenType.TIMESTAMPTZ, 493 TokenType.TIMESTAMPLTZ, 494 *TIMES, 495 } 496 497 SET_OPERATIONS = { 498 TokenType.UNION, 499 TokenType.INTERSECT, 500 TokenType.EXCEPT, 501 } 502 503 JOIN_METHODS = { 504 TokenType.NATURAL, 505 TokenType.ASOF, 506 } 507 508 JOIN_SIDES = { 509 TokenType.LEFT, 510 TokenType.RIGHT, 511 TokenType.FULL, 512 } 513 514 JOIN_KINDS = { 515 TokenType.INNER, 516 TokenType.OUTER, 517 TokenType.CROSS, 518 TokenType.SEMI, 519 TokenType.ANTI, 520 } 521 522 JOIN_HINTS: t.Set[str] = set() 523 524 LAMBDAS = { 525 TokenType.ARROW: lambda self, expressions: self.expression( 526 exp.Lambda, 527 this=self._replace_lambda( 528 self._parse_conjunction(), 529 {node.name for node in expressions}, 530 ), 531 expressions=expressions, 532 ), 533 TokenType.FARROW: lambda self, expressions: self.expression( 534 exp.Kwarg, 535 this=exp.var(expressions[0].name), 536 expression=self._parse_conjunction(), 537 ), 538 } 539 540 COLUMN_OPERATORS = { 541 TokenType.DOT: None, 542 TokenType.DCOLON: lambda self, this, to: self.expression( 543 exp.Cast if self.STRICT_CAST else exp.TryCast, 544 this=this, 545 to=to, 546 ), 547 TokenType.ARROW: lambda self, this, path: self.expression( 548 exp.JSONExtract, 549 this=this, 550 expression=self.dialect.to_json_path(path), 551 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 552 ), 553 TokenType.DARROW: lambda self, this, path: self.expression( 554 exp.JSONExtractScalar, 555 this=this, 556 expression=self.dialect.to_json_path(path), 557 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 558 ), 559 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 560 exp.JSONBExtract, 561 this=this, 562 expression=path, 563 ), 564 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 565 exp.JSONBExtractScalar, 566 this=this, 567 expression=path, 568 ), 569 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 570 exp.JSONBContains, 571 this=this, 572 expression=key, 573 ), 574 } 575 576 EXPRESSION_PARSERS = { 577 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 578 exp.Column: lambda self: self._parse_column(), 579 exp.Condition: lambda self: self._parse_conjunction(), 580 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 581 exp.Expression: lambda self: self._parse_statement(), 582 exp.From: lambda self: self._parse_from(), 583 exp.Group: lambda self: self._parse_group(), 584 exp.Having: lambda self: self._parse_having(), 585 exp.Identifier: lambda self: self._parse_id_var(), 586 exp.Join: lambda self: self._parse_join(), 587 exp.Lambda: lambda self: self._parse_lambda(), 588 exp.Lateral: lambda self: self._parse_lateral(), 589 exp.Limit: lambda self: self._parse_limit(), 590 exp.Offset: lambda self: self._parse_offset(), 591 exp.Order: lambda self: self._parse_order(), 592 exp.Ordered: lambda self: self._parse_ordered(), 593 exp.Properties: lambda self: self._parse_properties(), 594 exp.Qualify: lambda self: self._parse_qualify(), 595 exp.Returning: lambda self: self._parse_returning(), 596 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 597 exp.Table: lambda self: self._parse_table_parts(), 598 exp.TableAlias: lambda self: self._parse_table_alias(), 599 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 600 exp.Where: lambda self: self._parse_where(), 601 exp.Window: lambda self: self._parse_named_window(), 602 exp.With: lambda self: self._parse_with(), 603 "JOIN_TYPE": lambda self: self._parse_join_parts(), 604 } 605 606 STATEMENT_PARSERS = { 607 TokenType.ALTER: lambda self: self._parse_alter(), 608 TokenType.BEGIN: lambda self: self._parse_transaction(), 609 TokenType.CACHE: lambda self: self._parse_cache(), 610 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 611 TokenType.COMMENT: lambda self: self._parse_comment(), 612 TokenType.CREATE: lambda self: self._parse_create(), 613 TokenType.DELETE: lambda self: self._parse_delete(), 614 TokenType.DESC: lambda self: self._parse_describe(), 615 TokenType.DESCRIBE: lambda self: self._parse_describe(), 616 TokenType.DROP: lambda self: self._parse_drop(), 617 TokenType.INSERT: lambda self: self._parse_insert(), 618 TokenType.KILL: lambda self: self._parse_kill(), 619 TokenType.LOAD: lambda self: self._parse_load(), 620 TokenType.MERGE: lambda self: self._parse_merge(), 621 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 622 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 623 TokenType.REFRESH: lambda self: self._parse_refresh(), 624 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 625 TokenType.SET: lambda self: self._parse_set(), 626 TokenType.UNCACHE: lambda self: self._parse_uncache(), 627 TokenType.UPDATE: lambda self: self._parse_update(), 628 TokenType.USE: lambda self: self.expression( 629 exp.Use, 630 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 631 and exp.var(self._prev.text), 632 this=self._parse_table(schema=False), 633 ), 634 } 635 636 UNARY_PARSERS = { 637 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 638 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 639 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 640 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 641 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 642 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 643 } 644 645 PRIMARY_PARSERS = { 646 TokenType.STRING: lambda self, token: self.expression( 647 exp.Literal, this=token.text, is_string=True 648 ), 649 TokenType.NUMBER: lambda self, token: self.expression( 650 exp.Literal, this=token.text, is_string=False 651 ), 652 TokenType.STAR: lambda self, _: self.expression( 653 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 654 ), 655 TokenType.NULL: lambda self, _: self.expression(exp.Null), 656 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 657 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 658 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 659 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 660 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 661 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 662 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 663 exp.National, this=token.text 664 ), 665 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 666 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 667 exp.RawString, this=token.text 668 ), 669 TokenType.UNICODE_STRING: lambda self, token: self.expression( 670 exp.UnicodeString, 671 this=token.text, 672 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 673 ), 674 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 675 } 676 677 PLACEHOLDER_PARSERS = { 678 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 679 TokenType.PARAMETER: lambda self: self._parse_parameter(), 680 TokenType.COLON: lambda self: ( 681 self.expression(exp.Placeholder, this=self._prev.text) 682 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 683 else None 684 ), 685 } 686 687 RANGE_PARSERS = { 688 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 689 TokenType.GLOB: binary_range_parser(exp.Glob), 690 TokenType.ILIKE: binary_range_parser(exp.ILike), 691 TokenType.IN: lambda self, this: self._parse_in(this), 692 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 693 TokenType.IS: lambda self, this: self._parse_is(this), 694 TokenType.LIKE: binary_range_parser(exp.Like), 695 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 696 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 697 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 698 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 699 } 700 701 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 702 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 703 "AUTO": lambda self: self._parse_auto_property(), 704 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 705 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 706 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 707 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 708 "CHECKSUM": lambda self: self._parse_checksum(), 709 "CLUSTER BY": lambda self: self._parse_cluster(), 710 "CLUSTERED": lambda self: self._parse_clustered_by(), 711 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 712 exp.CollateProperty, **kwargs 713 ), 714 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 715 "CONTAINS": lambda self: self._parse_contains_property(), 716 "COPY": lambda self: self._parse_copy_property(), 717 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 718 "DEFINER": lambda self: self._parse_definer(), 719 "DETERMINISTIC": lambda self: self.expression( 720 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 721 ), 722 "DISTKEY": lambda self: self._parse_distkey(), 723 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 724 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 725 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 726 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 727 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 728 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 729 "FREESPACE": lambda self: self._parse_freespace(), 730 "HEAP": lambda self: self.expression(exp.HeapProperty), 731 "IMMUTABLE": lambda self: self.expression( 732 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 733 ), 734 "INHERITS": lambda self: self.expression( 735 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 736 ), 737 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 738 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 739 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 740 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 741 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 742 "LIKE": lambda self: self._parse_create_like(), 743 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 744 "LOCK": lambda self: self._parse_locking(), 745 "LOCKING": lambda self: self._parse_locking(), 746 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 747 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 748 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 749 "MODIFIES": lambda self: self._parse_modifies_property(), 750 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 751 "NO": lambda self: self._parse_no_property(), 752 "ON": lambda self: self._parse_on_property(), 753 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 754 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 755 "PARTITION": lambda self: self._parse_partitioned_of(), 756 "PARTITION BY": lambda self: self._parse_partitioned_by(), 757 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 758 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 759 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 760 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 761 "READS": lambda self: self._parse_reads_property(), 762 "REMOTE": lambda self: self._parse_remote_with_connection(), 763 "RETURNS": lambda self: self._parse_returns(), 764 "ROW": lambda self: self._parse_row(), 765 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 766 "SAMPLE": lambda self: self.expression( 767 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 768 ), 769 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 770 "SETTINGS": lambda self: self.expression( 771 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 772 ), 773 "SORTKEY": lambda self: self._parse_sortkey(), 774 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 775 "STABLE": lambda self: self.expression( 776 exp.StabilityProperty, this=exp.Literal.string("STABLE") 777 ), 778 "STORED": lambda self: self._parse_stored(), 779 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 780 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 781 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 782 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 783 "TO": lambda self: self._parse_to_table(), 784 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 785 "TRANSFORM": lambda self: self.expression( 786 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 787 ), 788 "TTL": lambda self: self._parse_ttl(), 789 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 790 "VOLATILE": lambda self: self._parse_volatile_property(), 791 "WITH": lambda self: self._parse_with_property(), 792 } 793 794 CONSTRAINT_PARSERS = { 795 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 796 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 797 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 798 "CHARACTER SET": lambda self: self.expression( 799 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 800 ), 801 "CHECK": lambda self: self.expression( 802 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 803 ), 804 "COLLATE": lambda self: self.expression( 805 exp.CollateColumnConstraint, this=self._parse_var() 806 ), 807 "COMMENT": lambda self: self.expression( 808 exp.CommentColumnConstraint, this=self._parse_string() 809 ), 810 "COMPRESS": lambda self: self._parse_compress(), 811 "CLUSTERED": lambda self: self.expression( 812 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 813 ), 814 "NONCLUSTERED": lambda self: self.expression( 815 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 816 ), 817 "DEFAULT": lambda self: self.expression( 818 exp.DefaultColumnConstraint, this=self._parse_bitwise() 819 ), 820 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 821 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 822 "FORMAT": lambda self: self.expression( 823 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 824 ), 825 "GENERATED": lambda self: self._parse_generated_as_identity(), 826 "IDENTITY": lambda self: self._parse_auto_increment(), 827 "INLINE": lambda self: self._parse_inline(), 828 "LIKE": lambda self: self._parse_create_like(), 829 "NOT": lambda self: self._parse_not_constraint(), 830 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 831 "ON": lambda self: ( 832 self._match(TokenType.UPDATE) 833 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 834 ) 835 or self.expression(exp.OnProperty, this=self._parse_id_var()), 836 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 837 "PERIOD": lambda self: self._parse_period_for_system_time(), 838 "PRIMARY KEY": lambda self: self._parse_primary_key(), 839 "REFERENCES": lambda self: self._parse_references(match=False), 840 "TITLE": lambda self: self.expression( 841 exp.TitleColumnConstraint, this=self._parse_var_or_string() 842 ), 843 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 844 "UNIQUE": lambda self: self._parse_unique(), 845 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 846 "WITH": lambda self: self.expression( 847 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 848 ), 849 } 850 851 ALTER_PARSERS = { 852 "ADD": lambda self: self._parse_alter_table_add(), 853 "ALTER": lambda self: self._parse_alter_table_alter(), 854 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 855 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 856 "DROP": lambda self: self._parse_alter_table_drop(), 857 "RENAME": lambda self: self._parse_alter_table_rename(), 858 } 859 860 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 861 862 NO_PAREN_FUNCTION_PARSERS = { 863 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 864 "CASE": lambda self: self._parse_case(), 865 "IF": lambda self: self._parse_if(), 866 "NEXT": lambda self: self._parse_next_value_for(), 867 } 868 869 INVALID_FUNC_NAME_TOKENS = { 870 TokenType.IDENTIFIER, 871 TokenType.STRING, 872 } 873 874 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 875 876 FUNCTION_PARSERS = { 877 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 878 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 879 "DECODE": lambda self: self._parse_decode(), 880 "EXTRACT": lambda self: self._parse_extract(), 881 "JSON_OBJECT": lambda self: self._parse_json_object(), 882 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 883 "JSON_TABLE": lambda self: self._parse_json_table(), 884 "MATCH": lambda self: self._parse_match_against(), 885 "OPENJSON": lambda self: self._parse_open_json(), 886 "POSITION": lambda self: self._parse_position(), 887 "PREDICT": lambda self: self._parse_predict(), 888 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 889 "STRING_AGG": lambda self: self._parse_string_agg(), 890 "SUBSTRING": lambda self: self._parse_substring(), 891 "TRIM": lambda self: self._parse_trim(), 892 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 893 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 894 } 895 896 QUERY_MODIFIER_PARSERS = { 897 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 898 TokenType.WHERE: lambda self: ("where", self._parse_where()), 899 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 900 TokenType.HAVING: lambda self: ("having", self._parse_having()), 901 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 902 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 903 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 904 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 905 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 906 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 907 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 908 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 909 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 910 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 911 TokenType.CLUSTER_BY: lambda self: ( 912 "cluster", 913 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 914 ), 915 TokenType.DISTRIBUTE_BY: lambda self: ( 916 "distribute", 917 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 918 ), 919 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 920 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 921 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 922 } 923 924 SET_PARSERS = { 925 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 926 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 927 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 928 "TRANSACTION": lambda self: self._parse_set_transaction(), 929 } 930 931 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 932 933 TYPE_LITERAL_PARSERS = { 934 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 935 } 936 937 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 938 939 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 940 941 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 942 943 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 944 TRANSACTION_CHARACTERISTICS = { 945 "ISOLATION LEVEL REPEATABLE READ", 946 "ISOLATION LEVEL READ COMMITTED", 947 "ISOLATION LEVEL READ UNCOMMITTED", 948 "ISOLATION LEVEL SERIALIZABLE", 949 "READ WRITE", 950 "READ ONLY", 951 } 952 953 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 954 955 CLONE_KEYWORDS = {"CLONE", "COPY"} 956 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 957 958 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 959 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 960 961 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 962 963 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 964 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 965 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 966 967 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 968 969 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 970 971 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 972 973 DISTINCT_TOKENS = {TokenType.DISTINCT} 974 975 NULL_TOKENS = {TokenType.NULL} 976 977 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 978 979 STRICT_CAST = True 980 981 PREFIXED_PIVOT_COLUMNS = False 982 IDENTIFY_PIVOT_STRINGS = False 983 984 LOG_DEFAULTS_TO_LN = False 985 986 # Whether ADD is present for each column added by ALTER TABLE 987 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 988 989 # Whether the table sample clause expects CSV syntax 990 TABLESAMPLE_CSV = False 991 992 # Whether the SET command needs a delimiter (e.g. "=") for assignments 993 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 994 995 # Whether the TRIM function expects the characters to trim as its first argument 996 TRIM_PATTERN_FIRST = False 997 998 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 999 STRING_ALIASES = False 1000 1001 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1002 MODIFIERS_ATTACHED_TO_UNION = True 1003 UNION_MODIFIERS = {"order", "limit", "offset"} 1004 1005 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1006 NO_PAREN_IF_COMMANDS = True 1007 1008 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1009 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1010 1011 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1012 # If this is True and '(' is not found, the keyword will be treated as an identifier 1013 VALUES_FOLLOWED_BY_PAREN = True 1014 1015 __slots__ = ( 1016 "error_level", 1017 "error_message_context", 1018 "max_errors", 1019 "dialect", 1020 "sql", 1021 "errors", 1022 "_tokens", 1023 "_index", 1024 "_curr", 1025 "_next", 1026 "_prev", 1027 "_prev_comments", 1028 ) 1029 1030 # Autofilled 1031 SHOW_TRIE: t.Dict = {} 1032 SET_TRIE: t.Dict = {} 1033 1034 def __init__( 1035 self, 1036 error_level: t.Optional[ErrorLevel] = None, 1037 error_message_context: int = 100, 1038 max_errors: int = 3, 1039 dialect: DialectType = None, 1040 ): 1041 from sqlglot.dialects import Dialect 1042 1043 self.error_level = error_level or ErrorLevel.IMMEDIATE 1044 self.error_message_context = error_message_context 1045 self.max_errors = max_errors 1046 self.dialect = Dialect.get_or_raise(dialect) 1047 self.reset() 1048 1049 def reset(self): 1050 self.sql = "" 1051 self.errors = [] 1052 self._tokens = [] 1053 self._index = 0 1054 self._curr = None 1055 self._next = None 1056 self._prev = None 1057 self._prev_comments = None 1058 1059 def parse( 1060 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1061 ) -> t.List[t.Optional[exp.Expression]]: 1062 """ 1063 Parses a list of tokens and returns a list of syntax trees, one tree 1064 per parsed SQL statement. 1065 1066 Args: 1067 raw_tokens: The list of tokens. 1068 sql: The original SQL string, used to produce helpful debug messages. 1069 1070 Returns: 1071 The list of the produced syntax trees. 1072 """ 1073 return self._parse( 1074 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1075 ) 1076 1077 def parse_into( 1078 self, 1079 expression_types: exp.IntoType, 1080 raw_tokens: t.List[Token], 1081 sql: t.Optional[str] = None, 1082 ) -> t.List[t.Optional[exp.Expression]]: 1083 """ 1084 Parses a list of tokens into a given Expression type. If a collection of Expression 1085 types is given instead, this method will try to parse the token list into each one 1086 of them, stopping at the first for which the parsing succeeds. 1087 1088 Args: 1089 expression_types: The expression type(s) to try and parse the token list into. 1090 raw_tokens: The list of tokens. 1091 sql: The original SQL string, used to produce helpful debug messages. 1092 1093 Returns: 1094 The target Expression. 1095 """ 1096 errors = [] 1097 for expression_type in ensure_list(expression_types): 1098 parser = self.EXPRESSION_PARSERS.get(expression_type) 1099 if not parser: 1100 raise TypeError(f"No parser registered for {expression_type}") 1101 1102 try: 1103 return self._parse(parser, raw_tokens, sql) 1104 except ParseError as e: 1105 e.errors[0]["into_expression"] = expression_type 1106 errors.append(e) 1107 1108 raise ParseError( 1109 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1110 errors=merge_errors(errors), 1111 ) from errors[-1] 1112 1113 def _parse( 1114 self, 1115 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1116 raw_tokens: t.List[Token], 1117 sql: t.Optional[str] = None, 1118 ) -> t.List[t.Optional[exp.Expression]]: 1119 self.reset() 1120 self.sql = sql or "" 1121 1122 total = len(raw_tokens) 1123 chunks: t.List[t.List[Token]] = [[]] 1124 1125 for i, token in enumerate(raw_tokens): 1126 if token.token_type == TokenType.SEMICOLON: 1127 if i < total - 1: 1128 chunks.append([]) 1129 else: 1130 chunks[-1].append(token) 1131 1132 expressions = [] 1133 1134 for tokens in chunks: 1135 self._index = -1 1136 self._tokens = tokens 1137 self._advance() 1138 1139 expressions.append(parse_method(self)) 1140 1141 if self._index < len(self._tokens): 1142 self.raise_error("Invalid expression / Unexpected token") 1143 1144 self.check_errors() 1145 1146 return expressions 1147 1148 def check_errors(self) -> None: 1149 """Logs or raises any found errors, depending on the chosen error level setting.""" 1150 if self.error_level == ErrorLevel.WARN: 1151 for error in self.errors: 1152 logger.error(str(error)) 1153 elif self.error_level == ErrorLevel.RAISE and self.errors: 1154 raise ParseError( 1155 concat_messages(self.errors, self.max_errors), 1156 errors=merge_errors(self.errors), 1157 ) 1158 1159 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1160 """ 1161 Appends an error in the list of recorded errors or raises it, depending on the chosen 1162 error level setting. 1163 """ 1164 token = token or self._curr or self._prev or Token.string("") 1165 start = token.start 1166 end = token.end + 1 1167 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1168 highlight = self.sql[start:end] 1169 end_context = self.sql[end : end + self.error_message_context] 1170 1171 error = ParseError.new( 1172 f"{message}. Line {token.line}, Col: {token.col}.\n" 1173 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1174 description=message, 1175 line=token.line, 1176 col=token.col, 1177 start_context=start_context, 1178 highlight=highlight, 1179 end_context=end_context, 1180 ) 1181 1182 if self.error_level == ErrorLevel.IMMEDIATE: 1183 raise error 1184 1185 self.errors.append(error) 1186 1187 def expression( 1188 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1189 ) -> E: 1190 """ 1191 Creates a new, validated Expression. 1192 1193 Args: 1194 exp_class: The expression class to instantiate. 1195 comments: An optional list of comments to attach to the expression. 1196 kwargs: The arguments to set for the expression along with their respective values. 1197 1198 Returns: 1199 The target expression. 1200 """ 1201 instance = exp_class(**kwargs) 1202 instance.add_comments(comments) if comments else self._add_comments(instance) 1203 return self.validate_expression(instance) 1204 1205 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1206 if expression and self._prev_comments: 1207 expression.add_comments(self._prev_comments) 1208 self._prev_comments = None 1209 1210 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1211 """ 1212 Validates an Expression, making sure that all its mandatory arguments are set. 1213 1214 Args: 1215 expression: The expression to validate. 1216 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1217 1218 Returns: 1219 The validated expression. 1220 """ 1221 if self.error_level != ErrorLevel.IGNORE: 1222 for error_message in expression.error_messages(args): 1223 self.raise_error(error_message) 1224 1225 return expression 1226 1227 def _find_sql(self, start: Token, end: Token) -> str: 1228 return self.sql[start.start : end.end + 1] 1229 1230 def _is_connected(self) -> bool: 1231 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1232 1233 def _advance(self, times: int = 1) -> None: 1234 self._index += times 1235 self._curr = seq_get(self._tokens, self._index) 1236 self._next = seq_get(self._tokens, self._index + 1) 1237 1238 if self._index > 0: 1239 self._prev = self._tokens[self._index - 1] 1240 self._prev_comments = self._prev.comments 1241 else: 1242 self._prev = None 1243 self._prev_comments = None 1244 1245 def _retreat(self, index: int) -> None: 1246 if index != self._index: 1247 self._advance(index - self._index) 1248 1249 def _warn_unsupported(self) -> None: 1250 if len(self._tokens) <= 1: 1251 return 1252 1253 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1254 # interested in emitting a warning for the one being currently processed. 1255 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1256 1257 logger.warning( 1258 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1259 ) 1260 1261 def _parse_command(self) -> exp.Command: 1262 self._warn_unsupported() 1263 return self.expression( 1264 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1265 ) 1266 1267 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1268 start = self._prev 1269 exists = self._parse_exists() if allow_exists else None 1270 1271 self._match(TokenType.ON) 1272 1273 kind = self._match_set(self.CREATABLES) and self._prev 1274 if not kind: 1275 return self._parse_as_command(start) 1276 1277 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1278 this = self._parse_user_defined_function(kind=kind.token_type) 1279 elif kind.token_type == TokenType.TABLE: 1280 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1281 elif kind.token_type == TokenType.COLUMN: 1282 this = self._parse_column() 1283 else: 1284 this = self._parse_id_var() 1285 1286 self._match(TokenType.IS) 1287 1288 return self.expression( 1289 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1290 ) 1291 1292 def _parse_to_table( 1293 self, 1294 ) -> exp.ToTableProperty: 1295 table = self._parse_table_parts(schema=True) 1296 return self.expression(exp.ToTableProperty, this=table) 1297 1298 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1299 def _parse_ttl(self) -> exp.Expression: 1300 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1301 this = self._parse_bitwise() 1302 1303 if self._match_text_seq("DELETE"): 1304 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1305 if self._match_text_seq("RECOMPRESS"): 1306 return self.expression( 1307 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1308 ) 1309 if self._match_text_seq("TO", "DISK"): 1310 return self.expression( 1311 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1312 ) 1313 if self._match_text_seq("TO", "VOLUME"): 1314 return self.expression( 1315 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1316 ) 1317 1318 return this 1319 1320 expressions = self._parse_csv(_parse_ttl_action) 1321 where = self._parse_where() 1322 group = self._parse_group() 1323 1324 aggregates = None 1325 if group and self._match(TokenType.SET): 1326 aggregates = self._parse_csv(self._parse_set_item) 1327 1328 return self.expression( 1329 exp.MergeTreeTTL, 1330 expressions=expressions, 1331 where=where, 1332 group=group, 1333 aggregates=aggregates, 1334 ) 1335 1336 def _parse_statement(self) -> t.Optional[exp.Expression]: 1337 if self._curr is None: 1338 return None 1339 1340 if self._match_set(self.STATEMENT_PARSERS): 1341 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1342 1343 if self._match_set(Tokenizer.COMMANDS): 1344 return self._parse_command() 1345 1346 expression = self._parse_expression() 1347 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1348 return self._parse_query_modifiers(expression) 1349 1350 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1351 start = self._prev 1352 temporary = self._match(TokenType.TEMPORARY) 1353 materialized = self._match_text_seq("MATERIALIZED") 1354 1355 kind = self._match_set(self.CREATABLES) and self._prev.text 1356 if not kind: 1357 return self._parse_as_command(start) 1358 1359 return self.expression( 1360 exp.Drop, 1361 comments=start.comments, 1362 exists=exists or self._parse_exists(), 1363 this=self._parse_table( 1364 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1365 ), 1366 kind=kind, 1367 temporary=temporary, 1368 materialized=materialized, 1369 cascade=self._match_text_seq("CASCADE"), 1370 constraints=self._match_text_seq("CONSTRAINTS"), 1371 purge=self._match_text_seq("PURGE"), 1372 ) 1373 1374 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1375 return ( 1376 self._match_text_seq("IF") 1377 and (not not_ or self._match(TokenType.NOT)) 1378 and self._match(TokenType.EXISTS) 1379 ) 1380 1381 def _parse_create(self) -> exp.Create | exp.Command: 1382 # Note: this can't be None because we've matched a statement parser 1383 start = self._prev 1384 comments = self._prev_comments 1385 1386 replace = ( 1387 start.token_type == TokenType.REPLACE 1388 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1389 or self._match_pair(TokenType.OR, TokenType.ALTER) 1390 ) 1391 unique = self._match(TokenType.UNIQUE) 1392 1393 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1394 self._advance() 1395 1396 properties = None 1397 create_token = self._match_set(self.CREATABLES) and self._prev 1398 1399 if not create_token: 1400 # exp.Properties.Location.POST_CREATE 1401 properties = self._parse_properties() 1402 create_token = self._match_set(self.CREATABLES) and self._prev 1403 1404 if not properties or not create_token: 1405 return self._parse_as_command(start) 1406 1407 exists = self._parse_exists(not_=True) 1408 this = None 1409 expression: t.Optional[exp.Expression] = None 1410 indexes = None 1411 no_schema_binding = None 1412 begin = None 1413 end = None 1414 clone = None 1415 1416 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1417 nonlocal properties 1418 if properties and temp_props: 1419 properties.expressions.extend(temp_props.expressions) 1420 elif temp_props: 1421 properties = temp_props 1422 1423 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1424 this = self._parse_user_defined_function(kind=create_token.token_type) 1425 1426 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1427 extend_props(self._parse_properties()) 1428 1429 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1430 1431 if not expression: 1432 if self._match(TokenType.COMMAND): 1433 expression = self._parse_as_command(self._prev) 1434 else: 1435 begin = self._match(TokenType.BEGIN) 1436 return_ = self._match_text_seq("RETURN") 1437 1438 if self._match(TokenType.STRING, advance=False): 1439 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1440 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1441 expression = self._parse_string() 1442 extend_props(self._parse_properties()) 1443 else: 1444 expression = self._parse_statement() 1445 1446 end = self._match_text_seq("END") 1447 1448 if return_: 1449 expression = self.expression(exp.Return, this=expression) 1450 elif create_token.token_type == TokenType.INDEX: 1451 this = self._parse_index(index=self._parse_id_var()) 1452 elif create_token.token_type in self.DB_CREATABLES: 1453 table_parts = self._parse_table_parts( 1454 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1455 ) 1456 1457 # exp.Properties.Location.POST_NAME 1458 self._match(TokenType.COMMA) 1459 extend_props(self._parse_properties(before=True)) 1460 1461 this = self._parse_schema(this=table_parts) 1462 1463 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1464 extend_props(self._parse_properties()) 1465 1466 self._match(TokenType.ALIAS) 1467 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1468 # exp.Properties.Location.POST_ALIAS 1469 extend_props(self._parse_properties()) 1470 1471 expression = self._parse_ddl_select() 1472 1473 if create_token.token_type == TokenType.TABLE: 1474 # exp.Properties.Location.POST_EXPRESSION 1475 extend_props(self._parse_properties()) 1476 1477 indexes = [] 1478 while True: 1479 index = self._parse_index() 1480 1481 # exp.Properties.Location.POST_INDEX 1482 extend_props(self._parse_properties()) 1483 1484 if not index: 1485 break 1486 else: 1487 self._match(TokenType.COMMA) 1488 indexes.append(index) 1489 elif create_token.token_type == TokenType.VIEW: 1490 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1491 no_schema_binding = True 1492 1493 shallow = self._match_text_seq("SHALLOW") 1494 1495 if self._match_texts(self.CLONE_KEYWORDS): 1496 copy = self._prev.text.lower() == "copy" 1497 clone = self.expression( 1498 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1499 ) 1500 1501 if self._curr: 1502 return self._parse_as_command(start) 1503 1504 return self.expression( 1505 exp.Create, 1506 comments=comments, 1507 this=this, 1508 kind=create_token.text.upper(), 1509 replace=replace, 1510 unique=unique, 1511 expression=expression, 1512 exists=exists, 1513 properties=properties, 1514 indexes=indexes, 1515 no_schema_binding=no_schema_binding, 1516 begin=begin, 1517 end=end, 1518 clone=clone, 1519 ) 1520 1521 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1522 # only used for teradata currently 1523 self._match(TokenType.COMMA) 1524 1525 kwargs = { 1526 "no": self._match_text_seq("NO"), 1527 "dual": self._match_text_seq("DUAL"), 1528 "before": self._match_text_seq("BEFORE"), 1529 "default": self._match_text_seq("DEFAULT"), 1530 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1531 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1532 "after": self._match_text_seq("AFTER"), 1533 "minimum": self._match_texts(("MIN", "MINIMUM")), 1534 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1535 } 1536 1537 if self._match_texts(self.PROPERTY_PARSERS): 1538 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1539 try: 1540 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1541 except TypeError: 1542 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1543 1544 return None 1545 1546 def _parse_property(self) -> t.Optional[exp.Expression]: 1547 if self._match_texts(self.PROPERTY_PARSERS): 1548 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1549 1550 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1551 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1552 1553 if self._match_text_seq("COMPOUND", "SORTKEY"): 1554 return self._parse_sortkey(compound=True) 1555 1556 if self._match_text_seq("SQL", "SECURITY"): 1557 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1558 1559 index = self._index 1560 key = self._parse_column() 1561 1562 if not self._match(TokenType.EQ): 1563 self._retreat(index) 1564 return None 1565 1566 return self.expression( 1567 exp.Property, 1568 this=key.to_dot() if isinstance(key, exp.Column) else key, 1569 value=self._parse_column() or self._parse_var(any_token=True), 1570 ) 1571 1572 def _parse_stored(self) -> exp.FileFormatProperty: 1573 self._match(TokenType.ALIAS) 1574 1575 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1576 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1577 1578 return self.expression( 1579 exp.FileFormatProperty, 1580 this=( 1581 self.expression( 1582 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1583 ) 1584 if input_format or output_format 1585 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1586 ), 1587 ) 1588 1589 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1590 self._match(TokenType.EQ) 1591 self._match(TokenType.ALIAS) 1592 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1593 1594 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1595 properties = [] 1596 while True: 1597 if before: 1598 prop = self._parse_property_before() 1599 else: 1600 prop = self._parse_property() 1601 1602 if not prop: 1603 break 1604 for p in ensure_list(prop): 1605 properties.append(p) 1606 1607 if properties: 1608 return self.expression(exp.Properties, expressions=properties) 1609 1610 return None 1611 1612 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1613 return self.expression( 1614 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1615 ) 1616 1617 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1618 if self._index >= 2: 1619 pre_volatile_token = self._tokens[self._index - 2] 1620 else: 1621 pre_volatile_token = None 1622 1623 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1624 return exp.VolatileProperty() 1625 1626 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1627 1628 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1629 self._match_pair(TokenType.EQ, TokenType.ON) 1630 1631 prop = self.expression(exp.WithSystemVersioningProperty) 1632 if self._match(TokenType.L_PAREN): 1633 self._match_text_seq("HISTORY_TABLE", "=") 1634 prop.set("this", self._parse_table_parts()) 1635 1636 if self._match(TokenType.COMMA): 1637 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1638 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1639 1640 self._match_r_paren() 1641 1642 return prop 1643 1644 def _parse_with_property( 1645 self, 1646 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1647 if self._match(TokenType.L_PAREN, advance=False): 1648 return self._parse_wrapped_csv(self._parse_property) 1649 1650 if self._match_text_seq("JOURNAL"): 1651 return self._parse_withjournaltable() 1652 1653 if self._match_text_seq("DATA"): 1654 return self._parse_withdata(no=False) 1655 elif self._match_text_seq("NO", "DATA"): 1656 return self._parse_withdata(no=True) 1657 1658 if not self._next: 1659 return None 1660 1661 return self._parse_withisolatedloading() 1662 1663 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1664 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1665 self._match(TokenType.EQ) 1666 1667 user = self._parse_id_var() 1668 self._match(TokenType.PARAMETER) 1669 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1670 1671 if not user or not host: 1672 return None 1673 1674 return exp.DefinerProperty(this=f"{user}@{host}") 1675 1676 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1677 self._match(TokenType.TABLE) 1678 self._match(TokenType.EQ) 1679 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1680 1681 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1682 return self.expression(exp.LogProperty, no=no) 1683 1684 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1685 return self.expression(exp.JournalProperty, **kwargs) 1686 1687 def _parse_checksum(self) -> exp.ChecksumProperty: 1688 self._match(TokenType.EQ) 1689 1690 on = None 1691 if self._match(TokenType.ON): 1692 on = True 1693 elif self._match_text_seq("OFF"): 1694 on = False 1695 1696 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1697 1698 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1699 return self.expression( 1700 exp.Cluster, 1701 expressions=( 1702 self._parse_wrapped_csv(self._parse_ordered) 1703 if wrapped 1704 else self._parse_csv(self._parse_ordered) 1705 ), 1706 ) 1707 1708 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1709 self._match_text_seq("BY") 1710 1711 self._match_l_paren() 1712 expressions = self._parse_csv(self._parse_column) 1713 self._match_r_paren() 1714 1715 if self._match_text_seq("SORTED", "BY"): 1716 self._match_l_paren() 1717 sorted_by = self._parse_csv(self._parse_ordered) 1718 self._match_r_paren() 1719 else: 1720 sorted_by = None 1721 1722 self._match(TokenType.INTO) 1723 buckets = self._parse_number() 1724 self._match_text_seq("BUCKETS") 1725 1726 return self.expression( 1727 exp.ClusteredByProperty, 1728 expressions=expressions, 1729 sorted_by=sorted_by, 1730 buckets=buckets, 1731 ) 1732 1733 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1734 if not self._match_text_seq("GRANTS"): 1735 self._retreat(self._index - 1) 1736 return None 1737 1738 return self.expression(exp.CopyGrantsProperty) 1739 1740 def _parse_freespace(self) -> exp.FreespaceProperty: 1741 self._match(TokenType.EQ) 1742 return self.expression( 1743 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1744 ) 1745 1746 def _parse_mergeblockratio( 1747 self, no: bool = False, default: bool = False 1748 ) -> exp.MergeBlockRatioProperty: 1749 if self._match(TokenType.EQ): 1750 return self.expression( 1751 exp.MergeBlockRatioProperty, 1752 this=self._parse_number(), 1753 percent=self._match(TokenType.PERCENT), 1754 ) 1755 1756 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1757 1758 def _parse_datablocksize( 1759 self, 1760 default: t.Optional[bool] = None, 1761 minimum: t.Optional[bool] = None, 1762 maximum: t.Optional[bool] = None, 1763 ) -> exp.DataBlocksizeProperty: 1764 self._match(TokenType.EQ) 1765 size = self._parse_number() 1766 1767 units = None 1768 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1769 units = self._prev.text 1770 1771 return self.expression( 1772 exp.DataBlocksizeProperty, 1773 size=size, 1774 units=units, 1775 default=default, 1776 minimum=minimum, 1777 maximum=maximum, 1778 ) 1779 1780 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1781 self._match(TokenType.EQ) 1782 always = self._match_text_seq("ALWAYS") 1783 manual = self._match_text_seq("MANUAL") 1784 never = self._match_text_seq("NEVER") 1785 default = self._match_text_seq("DEFAULT") 1786 1787 autotemp = None 1788 if self._match_text_seq("AUTOTEMP"): 1789 autotemp = self._parse_schema() 1790 1791 return self.expression( 1792 exp.BlockCompressionProperty, 1793 always=always, 1794 manual=manual, 1795 never=never, 1796 default=default, 1797 autotemp=autotemp, 1798 ) 1799 1800 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1801 no = self._match_text_seq("NO") 1802 concurrent = self._match_text_seq("CONCURRENT") 1803 self._match_text_seq("ISOLATED", "LOADING") 1804 for_all = self._match_text_seq("FOR", "ALL") 1805 for_insert = self._match_text_seq("FOR", "INSERT") 1806 for_none = self._match_text_seq("FOR", "NONE") 1807 return self.expression( 1808 exp.IsolatedLoadingProperty, 1809 no=no, 1810 concurrent=concurrent, 1811 for_all=for_all, 1812 for_insert=for_insert, 1813 for_none=for_none, 1814 ) 1815 1816 def _parse_locking(self) -> exp.LockingProperty: 1817 if self._match(TokenType.TABLE): 1818 kind = "TABLE" 1819 elif self._match(TokenType.VIEW): 1820 kind = "VIEW" 1821 elif self._match(TokenType.ROW): 1822 kind = "ROW" 1823 elif self._match_text_seq("DATABASE"): 1824 kind = "DATABASE" 1825 else: 1826 kind = None 1827 1828 if kind in ("DATABASE", "TABLE", "VIEW"): 1829 this = self._parse_table_parts() 1830 else: 1831 this = None 1832 1833 if self._match(TokenType.FOR): 1834 for_or_in = "FOR" 1835 elif self._match(TokenType.IN): 1836 for_or_in = "IN" 1837 else: 1838 for_or_in = None 1839 1840 if self._match_text_seq("ACCESS"): 1841 lock_type = "ACCESS" 1842 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1843 lock_type = "EXCLUSIVE" 1844 elif self._match_text_seq("SHARE"): 1845 lock_type = "SHARE" 1846 elif self._match_text_seq("READ"): 1847 lock_type = "READ" 1848 elif self._match_text_seq("WRITE"): 1849 lock_type = "WRITE" 1850 elif self._match_text_seq("CHECKSUM"): 1851 lock_type = "CHECKSUM" 1852 else: 1853 lock_type = None 1854 1855 override = self._match_text_seq("OVERRIDE") 1856 1857 return self.expression( 1858 exp.LockingProperty, 1859 this=this, 1860 kind=kind, 1861 for_or_in=for_or_in, 1862 lock_type=lock_type, 1863 override=override, 1864 ) 1865 1866 def _parse_partition_by(self) -> t.List[exp.Expression]: 1867 if self._match(TokenType.PARTITION_BY): 1868 return self._parse_csv(self._parse_conjunction) 1869 return [] 1870 1871 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1872 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1873 if self._match_text_seq("MINVALUE"): 1874 return exp.var("MINVALUE") 1875 if self._match_text_seq("MAXVALUE"): 1876 return exp.var("MAXVALUE") 1877 return self._parse_bitwise() 1878 1879 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1880 expression = None 1881 from_expressions = None 1882 to_expressions = None 1883 1884 if self._match(TokenType.IN): 1885 this = self._parse_wrapped_csv(self._parse_bitwise) 1886 elif self._match(TokenType.FROM): 1887 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1888 self._match_text_seq("TO") 1889 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1890 elif self._match_text_seq("WITH", "(", "MODULUS"): 1891 this = self._parse_number() 1892 self._match_text_seq(",", "REMAINDER") 1893 expression = self._parse_number() 1894 self._match_r_paren() 1895 else: 1896 self.raise_error("Failed to parse partition bound spec.") 1897 1898 return self.expression( 1899 exp.PartitionBoundSpec, 1900 this=this, 1901 expression=expression, 1902 from_expressions=from_expressions, 1903 to_expressions=to_expressions, 1904 ) 1905 1906 # https://www.postgresql.org/docs/current/sql-createtable.html 1907 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1908 if not self._match_text_seq("OF"): 1909 self._retreat(self._index - 1) 1910 return None 1911 1912 this = self._parse_table(schema=True) 1913 1914 if self._match(TokenType.DEFAULT): 1915 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1916 elif self._match_text_seq("FOR", "VALUES"): 1917 expression = self._parse_partition_bound_spec() 1918 else: 1919 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1920 1921 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1922 1923 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1924 self._match(TokenType.EQ) 1925 return self.expression( 1926 exp.PartitionedByProperty, 1927 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1928 ) 1929 1930 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1931 if self._match_text_seq("AND", "STATISTICS"): 1932 statistics = True 1933 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1934 statistics = False 1935 else: 1936 statistics = None 1937 1938 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1939 1940 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1941 if self._match_text_seq("SQL"): 1942 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 1943 return None 1944 1945 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1946 if self._match_text_seq("SQL", "DATA"): 1947 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 1948 return None 1949 1950 def _parse_no_property(self) -> t.Optional[exp.Expression]: 1951 if self._match_text_seq("PRIMARY", "INDEX"): 1952 return exp.NoPrimaryIndexProperty() 1953 if self._match_text_seq("SQL"): 1954 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 1955 return None 1956 1957 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1958 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1959 return exp.OnCommitProperty() 1960 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1961 return exp.OnCommitProperty(delete=True) 1962 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1963 1964 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1965 if self._match_text_seq("SQL", "DATA"): 1966 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 1967 return None 1968 1969 def _parse_distkey(self) -> exp.DistKeyProperty: 1970 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1971 1972 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1973 table = self._parse_table(schema=True) 1974 1975 options = [] 1976 while self._match_texts(("INCLUDING", "EXCLUDING")): 1977 this = self._prev.text.upper() 1978 1979 id_var = self._parse_id_var() 1980 if not id_var: 1981 return None 1982 1983 options.append( 1984 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1985 ) 1986 1987 return self.expression(exp.LikeProperty, this=table, expressions=options) 1988 1989 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1990 return self.expression( 1991 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1992 ) 1993 1994 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1995 self._match(TokenType.EQ) 1996 return self.expression( 1997 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1998 ) 1999 2000 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2001 self._match_text_seq("WITH", "CONNECTION") 2002 return self.expression( 2003 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2004 ) 2005 2006 def _parse_returns(self) -> exp.ReturnsProperty: 2007 value: t.Optional[exp.Expression] 2008 is_table = self._match(TokenType.TABLE) 2009 2010 if is_table: 2011 if self._match(TokenType.LT): 2012 value = self.expression( 2013 exp.Schema, 2014 this="TABLE", 2015 expressions=self._parse_csv(self._parse_struct_types), 2016 ) 2017 if not self._match(TokenType.GT): 2018 self.raise_error("Expecting >") 2019 else: 2020 value = self._parse_schema(exp.var("TABLE")) 2021 else: 2022 value = self._parse_types() 2023 2024 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2025 2026 def _parse_describe(self) -> exp.Describe: 2027 kind = self._match_set(self.CREATABLES) and self._prev.text 2028 extended = self._match_text_seq("EXTENDED") 2029 this = self._parse_table(schema=True) 2030 properties = self._parse_properties() 2031 expressions = properties.expressions if properties else None 2032 return self.expression( 2033 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2034 ) 2035 2036 def _parse_insert(self) -> exp.Insert: 2037 comments = ensure_list(self._prev_comments) 2038 overwrite = self._match(TokenType.OVERWRITE) 2039 ignore = self._match(TokenType.IGNORE) 2040 local = self._match_text_seq("LOCAL") 2041 alternative = None 2042 2043 if self._match_text_seq("DIRECTORY"): 2044 this: t.Optional[exp.Expression] = self.expression( 2045 exp.Directory, 2046 this=self._parse_var_or_string(), 2047 local=local, 2048 row_format=self._parse_row_format(match_row=True), 2049 ) 2050 else: 2051 if self._match(TokenType.OR): 2052 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2053 2054 self._match(TokenType.INTO) 2055 comments += ensure_list(self._prev_comments) 2056 self._match(TokenType.TABLE) 2057 this = self._parse_table(schema=True) 2058 2059 returning = self._parse_returning() 2060 2061 return self.expression( 2062 exp.Insert, 2063 comments=comments, 2064 this=this, 2065 by_name=self._match_text_seq("BY", "NAME"), 2066 exists=self._parse_exists(), 2067 partition=self._parse_partition(), 2068 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2069 and self._parse_conjunction(), 2070 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2071 conflict=self._parse_on_conflict(), 2072 returning=returning or self._parse_returning(), 2073 overwrite=overwrite, 2074 alternative=alternative, 2075 ignore=ignore, 2076 ) 2077 2078 def _parse_kill(self) -> exp.Kill: 2079 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2080 2081 return self.expression( 2082 exp.Kill, 2083 this=self._parse_primary(), 2084 kind=kind, 2085 ) 2086 2087 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2088 conflict = self._match_text_seq("ON", "CONFLICT") 2089 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2090 2091 if not conflict and not duplicate: 2092 return None 2093 2094 nothing = None 2095 expressions = None 2096 key = None 2097 constraint = None 2098 2099 if conflict: 2100 if self._match_text_seq("ON", "CONSTRAINT"): 2101 constraint = self._parse_id_var() 2102 else: 2103 key = self._parse_csv(self._parse_value) 2104 2105 self._match_text_seq("DO") 2106 if self._match_text_seq("NOTHING"): 2107 nothing = True 2108 else: 2109 self._match(TokenType.UPDATE) 2110 self._match(TokenType.SET) 2111 expressions = self._parse_csv(self._parse_equality) 2112 2113 return self.expression( 2114 exp.OnConflict, 2115 duplicate=duplicate, 2116 expressions=expressions, 2117 nothing=nothing, 2118 key=key, 2119 constraint=constraint, 2120 ) 2121 2122 def _parse_returning(self) -> t.Optional[exp.Returning]: 2123 if not self._match(TokenType.RETURNING): 2124 return None 2125 return self.expression( 2126 exp.Returning, 2127 expressions=self._parse_csv(self._parse_expression), 2128 into=self._match(TokenType.INTO) and self._parse_table_part(), 2129 ) 2130 2131 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2132 if not self._match(TokenType.FORMAT): 2133 return None 2134 return self._parse_row_format() 2135 2136 def _parse_row_format( 2137 self, match_row: bool = False 2138 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2139 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2140 return None 2141 2142 if self._match_text_seq("SERDE"): 2143 this = self._parse_string() 2144 2145 serde_properties = None 2146 if self._match(TokenType.SERDE_PROPERTIES): 2147 serde_properties = self.expression( 2148 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2149 ) 2150 2151 return self.expression( 2152 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2153 ) 2154 2155 self._match_text_seq("DELIMITED") 2156 2157 kwargs = {} 2158 2159 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2160 kwargs["fields"] = self._parse_string() 2161 if self._match_text_seq("ESCAPED", "BY"): 2162 kwargs["escaped"] = self._parse_string() 2163 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2164 kwargs["collection_items"] = self._parse_string() 2165 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2166 kwargs["map_keys"] = self._parse_string() 2167 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2168 kwargs["lines"] = self._parse_string() 2169 if self._match_text_seq("NULL", "DEFINED", "AS"): 2170 kwargs["null"] = self._parse_string() 2171 2172 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2173 2174 def _parse_load(self) -> exp.LoadData | exp.Command: 2175 if self._match_text_seq("DATA"): 2176 local = self._match_text_seq("LOCAL") 2177 self._match_text_seq("INPATH") 2178 inpath = self._parse_string() 2179 overwrite = self._match(TokenType.OVERWRITE) 2180 self._match_pair(TokenType.INTO, TokenType.TABLE) 2181 2182 return self.expression( 2183 exp.LoadData, 2184 this=self._parse_table(schema=True), 2185 local=local, 2186 overwrite=overwrite, 2187 inpath=inpath, 2188 partition=self._parse_partition(), 2189 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2190 serde=self._match_text_seq("SERDE") and self._parse_string(), 2191 ) 2192 return self._parse_as_command(self._prev) 2193 2194 def _parse_delete(self) -> exp.Delete: 2195 # This handles MySQL's "Multiple-Table Syntax" 2196 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2197 tables = None 2198 comments = self._prev_comments 2199 if not self._match(TokenType.FROM, advance=False): 2200 tables = self._parse_csv(self._parse_table) or None 2201 2202 returning = self._parse_returning() 2203 2204 return self.expression( 2205 exp.Delete, 2206 comments=comments, 2207 tables=tables, 2208 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2209 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2210 where=self._parse_where(), 2211 returning=returning or self._parse_returning(), 2212 limit=self._parse_limit(), 2213 ) 2214 2215 def _parse_update(self) -> exp.Update: 2216 comments = self._prev_comments 2217 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2218 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2219 returning = self._parse_returning() 2220 return self.expression( 2221 exp.Update, 2222 comments=comments, 2223 **{ # type: ignore 2224 "this": this, 2225 "expressions": expressions, 2226 "from": self._parse_from(joins=True), 2227 "where": self._parse_where(), 2228 "returning": returning or self._parse_returning(), 2229 "order": self._parse_order(), 2230 "limit": self._parse_limit(), 2231 }, 2232 ) 2233 2234 def _parse_uncache(self) -> exp.Uncache: 2235 if not self._match(TokenType.TABLE): 2236 self.raise_error("Expecting TABLE after UNCACHE") 2237 2238 return self.expression( 2239 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2240 ) 2241 2242 def _parse_cache(self) -> exp.Cache: 2243 lazy = self._match_text_seq("LAZY") 2244 self._match(TokenType.TABLE) 2245 table = self._parse_table(schema=True) 2246 2247 options = [] 2248 if self._match_text_seq("OPTIONS"): 2249 self._match_l_paren() 2250 k = self._parse_string() 2251 self._match(TokenType.EQ) 2252 v = self._parse_string() 2253 options = [k, v] 2254 self._match_r_paren() 2255 2256 self._match(TokenType.ALIAS) 2257 return self.expression( 2258 exp.Cache, 2259 this=table, 2260 lazy=lazy, 2261 options=options, 2262 expression=self._parse_select(nested=True), 2263 ) 2264 2265 def _parse_partition(self) -> t.Optional[exp.Partition]: 2266 if not self._match(TokenType.PARTITION): 2267 return None 2268 2269 return self.expression( 2270 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2271 ) 2272 2273 def _parse_value(self) -> exp.Tuple: 2274 if self._match(TokenType.L_PAREN): 2275 expressions = self._parse_csv(self._parse_expression) 2276 self._match_r_paren() 2277 return self.expression(exp.Tuple, expressions=expressions) 2278 2279 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2280 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2281 2282 def _parse_projections(self) -> t.List[exp.Expression]: 2283 return self._parse_expressions() 2284 2285 def _parse_select( 2286 self, 2287 nested: bool = False, 2288 table: bool = False, 2289 parse_subquery_alias: bool = True, 2290 parse_set_operation: bool = True, 2291 ) -> t.Optional[exp.Expression]: 2292 cte = self._parse_with() 2293 2294 if cte: 2295 this = self._parse_statement() 2296 2297 if not this: 2298 self.raise_error("Failed to parse any statement following CTE") 2299 return cte 2300 2301 if "with" in this.arg_types: 2302 this.set("with", cte) 2303 else: 2304 self.raise_error(f"{this.key} does not support CTE") 2305 this = cte 2306 2307 return this 2308 2309 # duckdb supports leading with FROM x 2310 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2311 2312 if self._match(TokenType.SELECT): 2313 comments = self._prev_comments 2314 2315 hint = self._parse_hint() 2316 all_ = self._match(TokenType.ALL) 2317 distinct = self._match_set(self.DISTINCT_TOKENS) 2318 2319 kind = ( 2320 self._match(TokenType.ALIAS) 2321 and self._match_texts(("STRUCT", "VALUE")) 2322 and self._prev.text.upper() 2323 ) 2324 2325 if distinct: 2326 distinct = self.expression( 2327 exp.Distinct, 2328 on=self._parse_value() if self._match(TokenType.ON) else None, 2329 ) 2330 2331 if all_ and distinct: 2332 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2333 2334 limit = self._parse_limit(top=True) 2335 projections = self._parse_projections() 2336 2337 this = self.expression( 2338 exp.Select, 2339 kind=kind, 2340 hint=hint, 2341 distinct=distinct, 2342 expressions=projections, 2343 limit=limit, 2344 ) 2345 this.comments = comments 2346 2347 into = self._parse_into() 2348 if into: 2349 this.set("into", into) 2350 2351 if not from_: 2352 from_ = self._parse_from() 2353 2354 if from_: 2355 this.set("from", from_) 2356 2357 this = self._parse_query_modifiers(this) 2358 elif (table or nested) and self._match(TokenType.L_PAREN): 2359 if self._match(TokenType.PIVOT): 2360 this = self._parse_simplified_pivot() 2361 elif self._match(TokenType.FROM): 2362 this = exp.select("*").from_( 2363 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2364 ) 2365 else: 2366 this = ( 2367 self._parse_table() 2368 if table 2369 else self._parse_select(nested=True, parse_set_operation=False) 2370 ) 2371 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2372 2373 self._match_r_paren() 2374 2375 # We return early here so that the UNION isn't attached to the subquery by the 2376 # following call to _parse_set_operations, but instead becomes the parent node 2377 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2378 elif self._match(TokenType.VALUES, advance=False): 2379 this = self._parse_derived_table_values() 2380 elif from_: 2381 this = exp.select("*").from_(from_.this, copy=False) 2382 else: 2383 this = None 2384 2385 if parse_set_operation: 2386 return self._parse_set_operations(this) 2387 return this 2388 2389 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2390 if not skip_with_token and not self._match(TokenType.WITH): 2391 return None 2392 2393 comments = self._prev_comments 2394 recursive = self._match(TokenType.RECURSIVE) 2395 2396 expressions = [] 2397 while True: 2398 expressions.append(self._parse_cte()) 2399 2400 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2401 break 2402 else: 2403 self._match(TokenType.WITH) 2404 2405 return self.expression( 2406 exp.With, comments=comments, expressions=expressions, recursive=recursive 2407 ) 2408 2409 def _parse_cte(self) -> exp.CTE: 2410 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2411 if not alias or not alias.this: 2412 self.raise_error("Expected CTE to have alias") 2413 2414 self._match(TokenType.ALIAS) 2415 return self.expression( 2416 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2417 ) 2418 2419 def _parse_table_alias( 2420 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2421 ) -> t.Optional[exp.TableAlias]: 2422 any_token = self._match(TokenType.ALIAS) 2423 alias = ( 2424 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2425 or self._parse_string_as_identifier() 2426 ) 2427 2428 index = self._index 2429 if self._match(TokenType.L_PAREN): 2430 columns = self._parse_csv(self._parse_function_parameter) 2431 self._match_r_paren() if columns else self._retreat(index) 2432 else: 2433 columns = None 2434 2435 if not alias and not columns: 2436 return None 2437 2438 return self.expression(exp.TableAlias, this=alias, columns=columns) 2439 2440 def _parse_subquery( 2441 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2442 ) -> t.Optional[exp.Subquery]: 2443 if not this: 2444 return None 2445 2446 return self.expression( 2447 exp.Subquery, 2448 this=this, 2449 pivots=self._parse_pivots(), 2450 alias=self._parse_table_alias() if parse_alias else None, 2451 ) 2452 2453 def _parse_query_modifiers( 2454 self, this: t.Optional[exp.Expression] 2455 ) -> t.Optional[exp.Expression]: 2456 if isinstance(this, self.MODIFIABLES): 2457 for join in iter(self._parse_join, None): 2458 this.append("joins", join) 2459 for lateral in iter(self._parse_lateral, None): 2460 this.append("laterals", lateral) 2461 2462 while True: 2463 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2464 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2465 key, expression = parser(self) 2466 2467 if expression: 2468 this.set(key, expression) 2469 if key == "limit": 2470 offset = expression.args.pop("offset", None) 2471 2472 if offset: 2473 offset = exp.Offset(expression=offset) 2474 this.set("offset", offset) 2475 2476 limit_by_expressions = expression.expressions 2477 expression.set("expressions", None) 2478 offset.set("expressions", limit_by_expressions) 2479 continue 2480 break 2481 return this 2482 2483 def _parse_hint(self) -> t.Optional[exp.Hint]: 2484 if self._match(TokenType.HINT): 2485 hints = [] 2486 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2487 hints.extend(hint) 2488 2489 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2490 self.raise_error("Expected */ after HINT") 2491 2492 return self.expression(exp.Hint, expressions=hints) 2493 2494 return None 2495 2496 def _parse_into(self) -> t.Optional[exp.Into]: 2497 if not self._match(TokenType.INTO): 2498 return None 2499 2500 temp = self._match(TokenType.TEMPORARY) 2501 unlogged = self._match_text_seq("UNLOGGED") 2502 self._match(TokenType.TABLE) 2503 2504 return self.expression( 2505 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2506 ) 2507 2508 def _parse_from( 2509 self, joins: bool = False, skip_from_token: bool = False 2510 ) -> t.Optional[exp.From]: 2511 if not skip_from_token and not self._match(TokenType.FROM): 2512 return None 2513 2514 return self.expression( 2515 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2516 ) 2517 2518 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2519 if not self._match(TokenType.MATCH_RECOGNIZE): 2520 return None 2521 2522 self._match_l_paren() 2523 2524 partition = self._parse_partition_by() 2525 order = self._parse_order() 2526 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2527 2528 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2529 rows = exp.var("ONE ROW PER MATCH") 2530 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2531 text = "ALL ROWS PER MATCH" 2532 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2533 text += " SHOW EMPTY MATCHES" 2534 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2535 text += " OMIT EMPTY MATCHES" 2536 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2537 text += " WITH UNMATCHED ROWS" 2538 rows = exp.var(text) 2539 else: 2540 rows = None 2541 2542 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2543 text = "AFTER MATCH SKIP" 2544 if self._match_text_seq("PAST", "LAST", "ROW"): 2545 text += " PAST LAST ROW" 2546 elif self._match_text_seq("TO", "NEXT", "ROW"): 2547 text += " TO NEXT ROW" 2548 elif self._match_text_seq("TO", "FIRST"): 2549 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2550 elif self._match_text_seq("TO", "LAST"): 2551 text += f" TO LAST {self._advance_any().text}" # type: ignore 2552 after = exp.var(text) 2553 else: 2554 after = None 2555 2556 if self._match_text_seq("PATTERN"): 2557 self._match_l_paren() 2558 2559 if not self._curr: 2560 self.raise_error("Expecting )", self._curr) 2561 2562 paren = 1 2563 start = self._curr 2564 2565 while self._curr and paren > 0: 2566 if self._curr.token_type == TokenType.L_PAREN: 2567 paren += 1 2568 if self._curr.token_type == TokenType.R_PAREN: 2569 paren -= 1 2570 2571 end = self._prev 2572 self._advance() 2573 2574 if paren > 0: 2575 self.raise_error("Expecting )", self._curr) 2576 2577 pattern = exp.var(self._find_sql(start, end)) 2578 else: 2579 pattern = None 2580 2581 define = ( 2582 self._parse_csv(self._parse_name_as_expression) 2583 if self._match_text_seq("DEFINE") 2584 else None 2585 ) 2586 2587 self._match_r_paren() 2588 2589 return self.expression( 2590 exp.MatchRecognize, 2591 partition_by=partition, 2592 order=order, 2593 measures=measures, 2594 rows=rows, 2595 after=after, 2596 pattern=pattern, 2597 define=define, 2598 alias=self._parse_table_alias(), 2599 ) 2600 2601 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2602 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2603 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2604 cross_apply = False 2605 2606 if cross_apply is not None: 2607 this = self._parse_select(table=True) 2608 view = None 2609 outer = None 2610 elif self._match(TokenType.LATERAL): 2611 this = self._parse_select(table=True) 2612 view = self._match(TokenType.VIEW) 2613 outer = self._match(TokenType.OUTER) 2614 else: 2615 return None 2616 2617 if not this: 2618 this = ( 2619 self._parse_unnest() 2620 or self._parse_function() 2621 or self._parse_id_var(any_token=False) 2622 ) 2623 2624 while self._match(TokenType.DOT): 2625 this = exp.Dot( 2626 this=this, 2627 expression=self._parse_function() or self._parse_id_var(any_token=False), 2628 ) 2629 2630 if view: 2631 table = self._parse_id_var(any_token=False) 2632 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2633 table_alias: t.Optional[exp.TableAlias] = self.expression( 2634 exp.TableAlias, this=table, columns=columns 2635 ) 2636 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2637 # We move the alias from the lateral's child node to the lateral itself 2638 table_alias = this.args["alias"].pop() 2639 else: 2640 table_alias = self._parse_table_alias() 2641 2642 return self.expression( 2643 exp.Lateral, 2644 this=this, 2645 view=view, 2646 outer=outer, 2647 alias=table_alias, 2648 cross_apply=cross_apply, 2649 ) 2650 2651 def _parse_join_parts( 2652 self, 2653 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2654 return ( 2655 self._match_set(self.JOIN_METHODS) and self._prev, 2656 self._match_set(self.JOIN_SIDES) and self._prev, 2657 self._match_set(self.JOIN_KINDS) and self._prev, 2658 ) 2659 2660 def _parse_join( 2661 self, skip_join_token: bool = False, parse_bracket: bool = False 2662 ) -> t.Optional[exp.Join]: 2663 if self._match(TokenType.COMMA): 2664 return self.expression(exp.Join, this=self._parse_table()) 2665 2666 index = self._index 2667 method, side, kind = self._parse_join_parts() 2668 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2669 join = self._match(TokenType.JOIN) 2670 2671 if not skip_join_token and not join: 2672 self._retreat(index) 2673 kind = None 2674 method = None 2675 side = None 2676 2677 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2678 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2679 2680 if not skip_join_token and not join and not outer_apply and not cross_apply: 2681 return None 2682 2683 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2684 2685 if method: 2686 kwargs["method"] = method.text 2687 if side: 2688 kwargs["side"] = side.text 2689 if kind: 2690 kwargs["kind"] = kind.text 2691 if hint: 2692 kwargs["hint"] = hint 2693 2694 if self._match(TokenType.ON): 2695 kwargs["on"] = self._parse_conjunction() 2696 elif self._match(TokenType.USING): 2697 kwargs["using"] = self._parse_wrapped_id_vars() 2698 elif not (kind and kind.token_type == TokenType.CROSS): 2699 index = self._index 2700 join = self._parse_join() 2701 2702 if join and self._match(TokenType.ON): 2703 kwargs["on"] = self._parse_conjunction() 2704 elif join and self._match(TokenType.USING): 2705 kwargs["using"] = self._parse_wrapped_id_vars() 2706 else: 2707 join = None 2708 self._retreat(index) 2709 2710 kwargs["this"].set("joins", [join] if join else None) 2711 2712 comments = [c for token in (method, side, kind) if token for c in token.comments] 2713 return self.expression(exp.Join, comments=comments, **kwargs) 2714 2715 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2716 this = self._parse_conjunction() 2717 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2718 return this 2719 2720 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2721 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2722 2723 return this 2724 2725 def _parse_index( 2726 self, 2727 index: t.Optional[exp.Expression] = None, 2728 ) -> t.Optional[exp.Index]: 2729 if index: 2730 unique = None 2731 primary = None 2732 amp = None 2733 2734 self._match(TokenType.ON) 2735 self._match(TokenType.TABLE) # hive 2736 table = self._parse_table_parts(schema=True) 2737 else: 2738 unique = self._match(TokenType.UNIQUE) 2739 primary = self._match_text_seq("PRIMARY") 2740 amp = self._match_text_seq("AMP") 2741 2742 if not self._match(TokenType.INDEX): 2743 return None 2744 2745 index = self._parse_id_var() 2746 table = None 2747 2748 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2749 2750 if self._match(TokenType.L_PAREN, advance=False): 2751 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2752 else: 2753 columns = None 2754 2755 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2756 2757 return self.expression( 2758 exp.Index, 2759 this=index, 2760 table=table, 2761 using=using, 2762 columns=columns, 2763 unique=unique, 2764 primary=primary, 2765 amp=amp, 2766 include=include, 2767 partition_by=self._parse_partition_by(), 2768 where=self._parse_where(), 2769 ) 2770 2771 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2772 hints: t.List[exp.Expression] = [] 2773 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2774 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2775 hints.append( 2776 self.expression( 2777 exp.WithTableHint, 2778 expressions=self._parse_csv( 2779 lambda: self._parse_function() or self._parse_var(any_token=True) 2780 ), 2781 ) 2782 ) 2783 self._match_r_paren() 2784 else: 2785 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2786 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2787 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2788 2789 self._match_texts(("INDEX", "KEY")) 2790 if self._match(TokenType.FOR): 2791 hint.set("target", self._advance_any() and self._prev.text.upper()) 2792 2793 hint.set("expressions", self._parse_wrapped_id_vars()) 2794 hints.append(hint) 2795 2796 return hints or None 2797 2798 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2799 return ( 2800 (not schema and self._parse_function(optional_parens=False)) 2801 or self._parse_id_var(any_token=False) 2802 or self._parse_string_as_identifier() 2803 or self._parse_placeholder() 2804 ) 2805 2806 def _parse_table_parts(self, schema: bool = False, is_db_reference: bool = False) -> exp.Table: 2807 catalog = None 2808 db = None 2809 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2810 2811 while self._match(TokenType.DOT): 2812 if catalog: 2813 # This allows nesting the table in arbitrarily many dot expressions if needed 2814 table = self.expression( 2815 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2816 ) 2817 else: 2818 catalog = db 2819 db = table 2820 table = self._parse_table_part(schema=schema) or "" 2821 2822 if is_db_reference: 2823 catalog = db 2824 db = table 2825 table = None 2826 2827 if not table and not is_db_reference: 2828 self.raise_error(f"Expected table name but got {self._curr}") 2829 if not db and is_db_reference: 2830 self.raise_error(f"Expected database name but got {self._curr}") 2831 2832 return self.expression( 2833 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2834 ) 2835 2836 def _parse_table( 2837 self, 2838 schema: bool = False, 2839 joins: bool = False, 2840 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2841 parse_bracket: bool = False, 2842 is_db_reference: bool = False, 2843 ) -> t.Optional[exp.Expression]: 2844 lateral = self._parse_lateral() 2845 if lateral: 2846 return lateral 2847 2848 unnest = self._parse_unnest() 2849 if unnest: 2850 return unnest 2851 2852 values = self._parse_derived_table_values() 2853 if values: 2854 return values 2855 2856 subquery = self._parse_select(table=True) 2857 if subquery: 2858 if not subquery.args.get("pivots"): 2859 subquery.set("pivots", self._parse_pivots()) 2860 return subquery 2861 2862 bracket = parse_bracket and self._parse_bracket(None) 2863 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2864 this = t.cast( 2865 exp.Expression, 2866 bracket 2867 or self._parse_bracket( 2868 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 2869 ), 2870 ) 2871 2872 if schema: 2873 return self._parse_schema(this=this) 2874 2875 version = self._parse_version() 2876 2877 if version: 2878 this.set("version", version) 2879 2880 if self.dialect.ALIAS_POST_TABLESAMPLE: 2881 table_sample = self._parse_table_sample() 2882 2883 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2884 if alias: 2885 this.set("alias", alias) 2886 2887 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2888 return self.expression( 2889 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2890 ) 2891 2892 this.set("hints", self._parse_table_hints()) 2893 2894 if not this.args.get("pivots"): 2895 this.set("pivots", self._parse_pivots()) 2896 2897 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2898 table_sample = self._parse_table_sample() 2899 2900 if table_sample: 2901 table_sample.set("this", this) 2902 this = table_sample 2903 2904 if joins: 2905 for join in iter(self._parse_join, None): 2906 this.append("joins", join) 2907 2908 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2909 this.set("ordinality", True) 2910 this.set("alias", self._parse_table_alias()) 2911 2912 return this 2913 2914 def _parse_version(self) -> t.Optional[exp.Version]: 2915 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2916 this = "TIMESTAMP" 2917 elif self._match(TokenType.VERSION_SNAPSHOT): 2918 this = "VERSION" 2919 else: 2920 return None 2921 2922 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2923 kind = self._prev.text.upper() 2924 start = self._parse_bitwise() 2925 self._match_texts(("TO", "AND")) 2926 end = self._parse_bitwise() 2927 expression: t.Optional[exp.Expression] = self.expression( 2928 exp.Tuple, expressions=[start, end] 2929 ) 2930 elif self._match_text_seq("CONTAINED", "IN"): 2931 kind = "CONTAINED IN" 2932 expression = self.expression( 2933 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2934 ) 2935 elif self._match(TokenType.ALL): 2936 kind = "ALL" 2937 expression = None 2938 else: 2939 self._match_text_seq("AS", "OF") 2940 kind = "AS OF" 2941 expression = self._parse_type() 2942 2943 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2944 2945 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2946 if not self._match(TokenType.UNNEST): 2947 return None 2948 2949 expressions = self._parse_wrapped_csv(self._parse_equality) 2950 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2951 2952 alias = self._parse_table_alias() if with_alias else None 2953 2954 if alias: 2955 if self.dialect.UNNEST_COLUMN_ONLY: 2956 if alias.args.get("columns"): 2957 self.raise_error("Unexpected extra column alias in unnest.") 2958 2959 alias.set("columns", [alias.this]) 2960 alias.set("this", None) 2961 2962 columns = alias.args.get("columns") or [] 2963 if offset and len(expressions) < len(columns): 2964 offset = columns.pop() 2965 2966 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2967 self._match(TokenType.ALIAS) 2968 offset = self._parse_id_var( 2969 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2970 ) or exp.to_identifier("offset") 2971 2972 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2973 2974 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2975 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2976 if not is_derived and not self._match_text_seq("VALUES"): 2977 return None 2978 2979 expressions = self._parse_csv(self._parse_value) 2980 alias = self._parse_table_alias() 2981 2982 if is_derived: 2983 self._match_r_paren() 2984 2985 return self.expression( 2986 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2987 ) 2988 2989 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2990 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2991 as_modifier and self._match_text_seq("USING", "SAMPLE") 2992 ): 2993 return None 2994 2995 bucket_numerator = None 2996 bucket_denominator = None 2997 bucket_field = None 2998 percent = None 2999 size = None 3000 seed = None 3001 3002 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3003 matched_l_paren = self._match(TokenType.L_PAREN) 3004 3005 if self.TABLESAMPLE_CSV: 3006 num = None 3007 expressions = self._parse_csv(self._parse_primary) 3008 else: 3009 expressions = None 3010 num = ( 3011 self._parse_factor() 3012 if self._match(TokenType.NUMBER, advance=False) 3013 else self._parse_primary() or self._parse_placeholder() 3014 ) 3015 3016 if self._match_text_seq("BUCKET"): 3017 bucket_numerator = self._parse_number() 3018 self._match_text_seq("OUT", "OF") 3019 bucket_denominator = bucket_denominator = self._parse_number() 3020 self._match(TokenType.ON) 3021 bucket_field = self._parse_field() 3022 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3023 percent = num 3024 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3025 size = num 3026 else: 3027 percent = num 3028 3029 if matched_l_paren: 3030 self._match_r_paren() 3031 3032 if self._match(TokenType.L_PAREN): 3033 method = self._parse_var(upper=True) 3034 seed = self._match(TokenType.COMMA) and self._parse_number() 3035 self._match_r_paren() 3036 elif self._match_texts(("SEED", "REPEATABLE")): 3037 seed = self._parse_wrapped(self._parse_number) 3038 3039 return self.expression( 3040 exp.TableSample, 3041 expressions=expressions, 3042 method=method, 3043 bucket_numerator=bucket_numerator, 3044 bucket_denominator=bucket_denominator, 3045 bucket_field=bucket_field, 3046 percent=percent, 3047 size=size, 3048 seed=seed, 3049 ) 3050 3051 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3052 return list(iter(self._parse_pivot, None)) or None 3053 3054 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 3055 return list(iter(self._parse_join, None)) or None 3056 3057 # https://duckdb.org/docs/sql/statements/pivot 3058 def _parse_simplified_pivot(self) -> exp.Pivot: 3059 def _parse_on() -> t.Optional[exp.Expression]: 3060 this = self._parse_bitwise() 3061 return self._parse_in(this) if self._match(TokenType.IN) else this 3062 3063 this = self._parse_table() 3064 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3065 using = self._match(TokenType.USING) and self._parse_csv( 3066 lambda: self._parse_alias(self._parse_function()) 3067 ) 3068 group = self._parse_group() 3069 return self.expression( 3070 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3071 ) 3072 3073 def _parse_pivot_in(self) -> exp.In: 3074 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3075 this = self._parse_conjunction() 3076 3077 self._match(TokenType.ALIAS) 3078 alias = self._parse_field() 3079 if alias: 3080 return self.expression(exp.PivotAlias, this=this, alias=alias) 3081 3082 return this 3083 3084 value = self._parse_column() 3085 3086 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3087 self.raise_error("Expecting IN (") 3088 3089 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3090 3091 self._match_r_paren() 3092 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3093 3094 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3095 index = self._index 3096 include_nulls = None 3097 3098 if self._match(TokenType.PIVOT): 3099 unpivot = False 3100 elif self._match(TokenType.UNPIVOT): 3101 unpivot = True 3102 3103 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3104 if self._match_text_seq("INCLUDE", "NULLS"): 3105 include_nulls = True 3106 elif self._match_text_seq("EXCLUDE", "NULLS"): 3107 include_nulls = False 3108 else: 3109 return None 3110 3111 expressions = [] 3112 3113 if not self._match(TokenType.L_PAREN): 3114 self._retreat(index) 3115 return None 3116 3117 if unpivot: 3118 expressions = self._parse_csv(self._parse_column) 3119 else: 3120 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3121 3122 if not expressions: 3123 self.raise_error("Failed to parse PIVOT's aggregation list") 3124 3125 if not self._match(TokenType.FOR): 3126 self.raise_error("Expecting FOR") 3127 3128 field = self._parse_pivot_in() 3129 3130 self._match_r_paren() 3131 3132 pivot = self.expression( 3133 exp.Pivot, 3134 expressions=expressions, 3135 field=field, 3136 unpivot=unpivot, 3137 include_nulls=include_nulls, 3138 ) 3139 3140 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3141 pivot.set("alias", self._parse_table_alias()) 3142 3143 if not unpivot: 3144 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3145 3146 columns: t.List[exp.Expression] = [] 3147 for fld in pivot.args["field"].expressions: 3148 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3149 for name in names: 3150 if self.PREFIXED_PIVOT_COLUMNS: 3151 name = f"{name}_{field_name}" if name else field_name 3152 else: 3153 name = f"{field_name}_{name}" if name else field_name 3154 3155 columns.append(exp.to_identifier(name)) 3156 3157 pivot.set("columns", columns) 3158 3159 return pivot 3160 3161 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3162 return [agg.alias for agg in aggregations] 3163 3164 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3165 if not skip_where_token and not self._match(TokenType.WHERE): 3166 return None 3167 3168 return self.expression( 3169 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3170 ) 3171 3172 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3173 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3174 return None 3175 3176 elements = defaultdict(list) 3177 3178 if self._match(TokenType.ALL): 3179 return self.expression(exp.Group, all=True) 3180 3181 while True: 3182 expressions = self._parse_csv(self._parse_conjunction) 3183 if expressions: 3184 elements["expressions"].extend(expressions) 3185 3186 grouping_sets = self._parse_grouping_sets() 3187 if grouping_sets: 3188 elements["grouping_sets"].extend(grouping_sets) 3189 3190 rollup = None 3191 cube = None 3192 totals = None 3193 3194 index = self._index 3195 with_ = self._match(TokenType.WITH) 3196 if self._match(TokenType.ROLLUP): 3197 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3198 elements["rollup"].extend(ensure_list(rollup)) 3199 3200 if self._match(TokenType.CUBE): 3201 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3202 elements["cube"].extend(ensure_list(cube)) 3203 3204 if self._match_text_seq("TOTALS"): 3205 totals = True 3206 elements["totals"] = True # type: ignore 3207 3208 if not (grouping_sets or rollup or cube or totals): 3209 if with_: 3210 self._retreat(index) 3211 break 3212 3213 return self.expression(exp.Group, **elements) # type: ignore 3214 3215 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3216 if not self._match(TokenType.GROUPING_SETS): 3217 return None 3218 3219 return self._parse_wrapped_csv(self._parse_grouping_set) 3220 3221 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3222 if self._match(TokenType.L_PAREN): 3223 grouping_set = self._parse_csv(self._parse_column) 3224 self._match_r_paren() 3225 return self.expression(exp.Tuple, expressions=grouping_set) 3226 3227 return self._parse_column() 3228 3229 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3230 if not skip_having_token and not self._match(TokenType.HAVING): 3231 return None 3232 return self.expression(exp.Having, this=self._parse_conjunction()) 3233 3234 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3235 if not self._match(TokenType.QUALIFY): 3236 return None 3237 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3238 3239 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3240 if skip_start_token: 3241 start = None 3242 elif self._match(TokenType.START_WITH): 3243 start = self._parse_conjunction() 3244 else: 3245 return None 3246 3247 self._match(TokenType.CONNECT_BY) 3248 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3249 exp.Prior, this=self._parse_bitwise() 3250 ) 3251 connect = self._parse_conjunction() 3252 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3253 3254 if not start and self._match(TokenType.START_WITH): 3255 start = self._parse_conjunction() 3256 3257 return self.expression(exp.Connect, start=start, connect=connect) 3258 3259 def _parse_name_as_expression(self) -> exp.Alias: 3260 return self.expression( 3261 exp.Alias, 3262 alias=self._parse_id_var(any_token=True), 3263 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3264 ) 3265 3266 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3267 if self._match_text_seq("INTERPOLATE"): 3268 return self._parse_wrapped_csv(self._parse_name_as_expression) 3269 return None 3270 3271 def _parse_order( 3272 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3273 ) -> t.Optional[exp.Expression]: 3274 siblings = None 3275 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3276 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3277 return this 3278 3279 siblings = True 3280 3281 return self.expression( 3282 exp.Order, 3283 this=this, 3284 expressions=self._parse_csv(self._parse_ordered), 3285 interpolate=self._parse_interpolate(), 3286 siblings=siblings, 3287 ) 3288 3289 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3290 if not self._match(token): 3291 return None 3292 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3293 3294 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3295 this = parse_method() if parse_method else self._parse_conjunction() 3296 3297 asc = self._match(TokenType.ASC) 3298 desc = self._match(TokenType.DESC) or (asc and False) 3299 3300 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3301 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3302 3303 nulls_first = is_nulls_first or False 3304 explicitly_null_ordered = is_nulls_first or is_nulls_last 3305 3306 if ( 3307 not explicitly_null_ordered 3308 and ( 3309 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3310 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3311 ) 3312 and self.dialect.NULL_ORDERING != "nulls_are_last" 3313 ): 3314 nulls_first = True 3315 3316 if self._match_text_seq("WITH", "FILL"): 3317 with_fill = self.expression( 3318 exp.WithFill, 3319 **{ # type: ignore 3320 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3321 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3322 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3323 }, 3324 ) 3325 else: 3326 with_fill = None 3327 3328 return self.expression( 3329 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3330 ) 3331 3332 def _parse_limit( 3333 self, this: t.Optional[exp.Expression] = None, top: bool = False 3334 ) -> t.Optional[exp.Expression]: 3335 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3336 comments = self._prev_comments 3337 if top: 3338 limit_paren = self._match(TokenType.L_PAREN) 3339 expression = self._parse_term() if limit_paren else self._parse_number() 3340 3341 if limit_paren: 3342 self._match_r_paren() 3343 else: 3344 expression = self._parse_term() 3345 3346 if self._match(TokenType.COMMA): 3347 offset = expression 3348 expression = self._parse_term() 3349 else: 3350 offset = None 3351 3352 limit_exp = self.expression( 3353 exp.Limit, 3354 this=this, 3355 expression=expression, 3356 offset=offset, 3357 comments=comments, 3358 expressions=self._parse_limit_by(), 3359 ) 3360 3361 return limit_exp 3362 3363 if self._match(TokenType.FETCH): 3364 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3365 direction = self._prev.text.upper() if direction else "FIRST" 3366 3367 count = self._parse_field(tokens=self.FETCH_TOKENS) 3368 percent = self._match(TokenType.PERCENT) 3369 3370 self._match_set((TokenType.ROW, TokenType.ROWS)) 3371 3372 only = self._match_text_seq("ONLY") 3373 with_ties = self._match_text_seq("WITH", "TIES") 3374 3375 if only and with_ties: 3376 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3377 3378 return self.expression( 3379 exp.Fetch, 3380 direction=direction, 3381 count=count, 3382 percent=percent, 3383 with_ties=with_ties, 3384 ) 3385 3386 return this 3387 3388 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3389 if not self._match(TokenType.OFFSET): 3390 return this 3391 3392 count = self._parse_term() 3393 self._match_set((TokenType.ROW, TokenType.ROWS)) 3394 3395 return self.expression( 3396 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3397 ) 3398 3399 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3400 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3401 3402 def _parse_locks(self) -> t.List[exp.Lock]: 3403 locks = [] 3404 while True: 3405 if self._match_text_seq("FOR", "UPDATE"): 3406 update = True 3407 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3408 "LOCK", "IN", "SHARE", "MODE" 3409 ): 3410 update = False 3411 else: 3412 break 3413 3414 expressions = None 3415 if self._match_text_seq("OF"): 3416 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3417 3418 wait: t.Optional[bool | exp.Expression] = None 3419 if self._match_text_seq("NOWAIT"): 3420 wait = True 3421 elif self._match_text_seq("WAIT"): 3422 wait = self._parse_primary() 3423 elif self._match_text_seq("SKIP", "LOCKED"): 3424 wait = False 3425 3426 locks.append( 3427 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3428 ) 3429 3430 return locks 3431 3432 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3433 while this and self._match_set(self.SET_OPERATIONS): 3434 token_type = self._prev.token_type 3435 3436 if token_type == TokenType.UNION: 3437 operation = exp.Union 3438 elif token_type == TokenType.EXCEPT: 3439 operation = exp.Except 3440 else: 3441 operation = exp.Intersect 3442 3443 comments = self._prev.comments 3444 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3445 by_name = self._match_text_seq("BY", "NAME") 3446 expression = self._parse_select(nested=True, parse_set_operation=False) 3447 3448 this = self.expression( 3449 operation, 3450 comments=comments, 3451 this=this, 3452 distinct=distinct, 3453 by_name=by_name, 3454 expression=expression, 3455 ) 3456 3457 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3458 expression = this.expression 3459 3460 if expression: 3461 for arg in self.UNION_MODIFIERS: 3462 expr = expression.args.get(arg) 3463 if expr: 3464 this.set(arg, expr.pop()) 3465 3466 return this 3467 3468 def _parse_expression(self) -> t.Optional[exp.Expression]: 3469 return self._parse_alias(self._parse_conjunction()) 3470 3471 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3472 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3473 3474 def _parse_equality(self) -> t.Optional[exp.Expression]: 3475 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3476 3477 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3478 return self._parse_tokens(self._parse_range, self.COMPARISON) 3479 3480 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3481 this = this or self._parse_bitwise() 3482 negate = self._match(TokenType.NOT) 3483 3484 if self._match_set(self.RANGE_PARSERS): 3485 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3486 if not expression: 3487 return this 3488 3489 this = expression 3490 elif self._match(TokenType.ISNULL): 3491 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3492 3493 # Postgres supports ISNULL and NOTNULL for conditions. 3494 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3495 if self._match(TokenType.NOTNULL): 3496 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3497 this = self.expression(exp.Not, this=this) 3498 3499 if negate: 3500 this = self.expression(exp.Not, this=this) 3501 3502 if self._match(TokenType.IS): 3503 this = self._parse_is(this) 3504 3505 return this 3506 3507 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3508 index = self._index - 1 3509 negate = self._match(TokenType.NOT) 3510 3511 if self._match_text_seq("DISTINCT", "FROM"): 3512 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3513 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3514 3515 expression = self._parse_null() or self._parse_boolean() 3516 if not expression: 3517 self._retreat(index) 3518 return None 3519 3520 this = self.expression(exp.Is, this=this, expression=expression) 3521 return self.expression(exp.Not, this=this) if negate else this 3522 3523 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3524 unnest = self._parse_unnest(with_alias=False) 3525 if unnest: 3526 this = self.expression(exp.In, this=this, unnest=unnest) 3527 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3528 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3529 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3530 3531 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3532 this = self.expression(exp.In, this=this, query=expressions[0]) 3533 else: 3534 this = self.expression(exp.In, this=this, expressions=expressions) 3535 3536 if matched_l_paren: 3537 self._match_r_paren(this) 3538 elif not self._match(TokenType.R_BRACKET, expression=this): 3539 self.raise_error("Expecting ]") 3540 else: 3541 this = self.expression(exp.In, this=this, field=self._parse_field()) 3542 3543 return this 3544 3545 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3546 low = self._parse_bitwise() 3547 self._match(TokenType.AND) 3548 high = self._parse_bitwise() 3549 return self.expression(exp.Between, this=this, low=low, high=high) 3550 3551 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3552 if not self._match(TokenType.ESCAPE): 3553 return this 3554 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3555 3556 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3557 index = self._index 3558 3559 if not self._match(TokenType.INTERVAL) and match_interval: 3560 return None 3561 3562 if self._match(TokenType.STRING, advance=False): 3563 this = self._parse_primary() 3564 else: 3565 this = self._parse_term() 3566 3567 if not this or ( 3568 isinstance(this, exp.Column) 3569 and not this.table 3570 and not this.this.quoted 3571 and this.name.upper() == "IS" 3572 ): 3573 self._retreat(index) 3574 return None 3575 3576 unit = self._parse_function() or ( 3577 not self._match(TokenType.ALIAS, advance=False) 3578 and self._parse_var(any_token=True, upper=True) 3579 ) 3580 3581 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3582 # each INTERVAL expression into this canonical form so it's easy to transpile 3583 if this and this.is_number: 3584 this = exp.Literal.string(this.name) 3585 elif this and this.is_string: 3586 parts = this.name.split() 3587 3588 if len(parts) == 2: 3589 if unit: 3590 # This is not actually a unit, it's something else (e.g. a "window side") 3591 unit = None 3592 self._retreat(self._index - 1) 3593 3594 this = exp.Literal.string(parts[0]) 3595 unit = self.expression(exp.Var, this=parts[1].upper()) 3596 3597 return self.expression(exp.Interval, this=this, unit=unit) 3598 3599 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3600 this = self._parse_term() 3601 3602 while True: 3603 if self._match_set(self.BITWISE): 3604 this = self.expression( 3605 self.BITWISE[self._prev.token_type], 3606 this=this, 3607 expression=self._parse_term(), 3608 ) 3609 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3610 this = self.expression( 3611 exp.DPipe, 3612 this=this, 3613 expression=self._parse_term(), 3614 safe=not self.dialect.STRICT_STRING_CONCAT, 3615 ) 3616 elif self._match(TokenType.DQMARK): 3617 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3618 elif self._match_pair(TokenType.LT, TokenType.LT): 3619 this = self.expression( 3620 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3621 ) 3622 elif self._match_pair(TokenType.GT, TokenType.GT): 3623 this = self.expression( 3624 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3625 ) 3626 else: 3627 break 3628 3629 return this 3630 3631 def _parse_term(self) -> t.Optional[exp.Expression]: 3632 return self._parse_tokens(self._parse_factor, self.TERM) 3633 3634 def _parse_factor(self) -> t.Optional[exp.Expression]: 3635 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3636 this = parse_method() 3637 3638 while self._match_set(self.FACTOR): 3639 this = self.expression( 3640 self.FACTOR[self._prev.token_type], 3641 this=this, 3642 comments=self._prev_comments, 3643 expression=parse_method(), 3644 ) 3645 if isinstance(this, exp.Div): 3646 this.args["typed"] = self.dialect.TYPED_DIVISION 3647 this.args["safe"] = self.dialect.SAFE_DIVISION 3648 3649 return this 3650 3651 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3652 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3653 3654 def _parse_unary(self) -> t.Optional[exp.Expression]: 3655 if self._match_set(self.UNARY_PARSERS): 3656 return self.UNARY_PARSERS[self._prev.token_type](self) 3657 return self._parse_at_time_zone(self._parse_type()) 3658 3659 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3660 interval = parse_interval and self._parse_interval() 3661 if interval: 3662 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3663 while True: 3664 index = self._index 3665 self._match(TokenType.PLUS) 3666 3667 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3668 self._retreat(index) 3669 break 3670 3671 interval = self.expression( # type: ignore 3672 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3673 ) 3674 3675 return interval 3676 3677 index = self._index 3678 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3679 this = self._parse_column() 3680 3681 if data_type: 3682 if isinstance(this, exp.Literal): 3683 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3684 if parser: 3685 return parser(self, this, data_type) 3686 return self.expression(exp.Cast, this=this, to=data_type) 3687 if not data_type.expressions: 3688 self._retreat(index) 3689 return self._parse_column() 3690 return self._parse_column_ops(data_type) 3691 3692 return this and self._parse_column_ops(this) 3693 3694 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3695 this = self._parse_type() 3696 if not this: 3697 return None 3698 3699 return self.expression( 3700 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3701 ) 3702 3703 def _parse_types( 3704 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3705 ) -> t.Optional[exp.Expression]: 3706 index = self._index 3707 3708 prefix = self._match_text_seq("SYSUDTLIB", ".") 3709 3710 if not self._match_set(self.TYPE_TOKENS): 3711 identifier = allow_identifiers and self._parse_id_var( 3712 any_token=False, tokens=(TokenType.VAR,) 3713 ) 3714 if identifier: 3715 tokens = self.dialect.tokenize(identifier.name) 3716 3717 if len(tokens) != 1: 3718 self.raise_error("Unexpected identifier", self._prev) 3719 3720 if tokens[0].token_type in self.TYPE_TOKENS: 3721 self._prev = tokens[0] 3722 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3723 type_name = identifier.name 3724 3725 while self._match(TokenType.DOT): 3726 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3727 3728 return exp.DataType.build(type_name, udt=True) 3729 else: 3730 self._retreat(self._index - 1) 3731 return None 3732 else: 3733 return None 3734 3735 type_token = self._prev.token_type 3736 3737 if type_token == TokenType.PSEUDO_TYPE: 3738 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3739 3740 if type_token == TokenType.OBJECT_IDENTIFIER: 3741 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3742 3743 nested = type_token in self.NESTED_TYPE_TOKENS 3744 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3745 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3746 expressions = None 3747 maybe_func = False 3748 3749 if self._match(TokenType.L_PAREN): 3750 if is_struct: 3751 expressions = self._parse_csv(self._parse_struct_types) 3752 elif nested: 3753 expressions = self._parse_csv( 3754 lambda: self._parse_types( 3755 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3756 ) 3757 ) 3758 elif type_token in self.ENUM_TYPE_TOKENS: 3759 expressions = self._parse_csv(self._parse_equality) 3760 elif is_aggregate: 3761 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3762 any_token=False, tokens=(TokenType.VAR,) 3763 ) 3764 if not func_or_ident or not self._match(TokenType.COMMA): 3765 return None 3766 expressions = self._parse_csv( 3767 lambda: self._parse_types( 3768 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3769 ) 3770 ) 3771 expressions.insert(0, func_or_ident) 3772 else: 3773 expressions = self._parse_csv(self._parse_type_size) 3774 3775 if not expressions or not self._match(TokenType.R_PAREN): 3776 self._retreat(index) 3777 return None 3778 3779 maybe_func = True 3780 3781 this: t.Optional[exp.Expression] = None 3782 values: t.Optional[t.List[exp.Expression]] = None 3783 3784 if nested and self._match(TokenType.LT): 3785 if is_struct: 3786 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 3787 else: 3788 expressions = self._parse_csv( 3789 lambda: self._parse_types( 3790 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3791 ) 3792 ) 3793 3794 if not self._match(TokenType.GT): 3795 self.raise_error("Expecting >") 3796 3797 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3798 values = self._parse_csv(self._parse_conjunction) 3799 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3800 3801 if type_token in self.TIMESTAMPS: 3802 if self._match_text_seq("WITH", "TIME", "ZONE"): 3803 maybe_func = False 3804 tz_type = ( 3805 exp.DataType.Type.TIMETZ 3806 if type_token in self.TIMES 3807 else exp.DataType.Type.TIMESTAMPTZ 3808 ) 3809 this = exp.DataType(this=tz_type, expressions=expressions) 3810 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3811 maybe_func = False 3812 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3813 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3814 maybe_func = False 3815 elif type_token == TokenType.INTERVAL: 3816 unit = self._parse_var() 3817 3818 if self._match_text_seq("TO"): 3819 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3820 else: 3821 span = None 3822 3823 if span or not unit: 3824 this = self.expression( 3825 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3826 ) 3827 else: 3828 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3829 3830 if maybe_func and check_func: 3831 index2 = self._index 3832 peek = self._parse_string() 3833 3834 if not peek: 3835 self._retreat(index) 3836 return None 3837 3838 self._retreat(index2) 3839 3840 if not this: 3841 if self._match_text_seq("UNSIGNED"): 3842 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3843 if not unsigned_type_token: 3844 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3845 3846 type_token = unsigned_type_token or type_token 3847 3848 this = exp.DataType( 3849 this=exp.DataType.Type[type_token.value], 3850 expressions=expressions, 3851 nested=nested, 3852 values=values, 3853 prefix=prefix, 3854 ) 3855 3856 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3857 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3858 3859 return this 3860 3861 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 3862 index = self._index 3863 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3864 self._match(TokenType.COLON) 3865 column_def = self._parse_column_def(this) 3866 3867 if type_required and ( 3868 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 3869 ): 3870 self._retreat(index) 3871 return self._parse_types() 3872 3873 return column_def 3874 3875 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3876 if not self._match_text_seq("AT", "TIME", "ZONE"): 3877 return this 3878 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3879 3880 def _parse_column(self) -> t.Optional[exp.Expression]: 3881 this = self._parse_column_reference() 3882 return self._parse_column_ops(this) if this else self._parse_bracket(this) 3883 3884 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 3885 this = self._parse_field() 3886 if ( 3887 not this 3888 and self._match(TokenType.VALUES, advance=False) 3889 and self.VALUES_FOLLOWED_BY_PAREN 3890 and (not self._next or self._next.token_type != TokenType.L_PAREN) 3891 ): 3892 this = self._parse_id_var() 3893 3894 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 3895 3896 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3897 this = self._parse_bracket(this) 3898 3899 while self._match_set(self.COLUMN_OPERATORS): 3900 op_token = self._prev.token_type 3901 op = self.COLUMN_OPERATORS.get(op_token) 3902 3903 if op_token == TokenType.DCOLON: 3904 field = self._parse_types() 3905 if not field: 3906 self.raise_error("Expected type") 3907 elif op and self._curr: 3908 field = self._parse_column_reference() 3909 else: 3910 field = self._parse_field(anonymous_func=True, any_token=True) 3911 3912 if isinstance(field, exp.Func): 3913 # bigquery allows function calls like x.y.count(...) 3914 # SAFE.SUBSTR(...) 3915 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3916 this = self._replace_columns_with_dots(this) 3917 3918 if op: 3919 this = op(self, this, field) 3920 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3921 this = self.expression( 3922 exp.Column, 3923 this=field, 3924 table=this.this, 3925 db=this.args.get("table"), 3926 catalog=this.args.get("db"), 3927 ) 3928 else: 3929 this = self.expression(exp.Dot, this=this, expression=field) 3930 this = self._parse_bracket(this) 3931 return this 3932 3933 def _parse_primary(self) -> t.Optional[exp.Expression]: 3934 if self._match_set(self.PRIMARY_PARSERS): 3935 token_type = self._prev.token_type 3936 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3937 3938 if token_type == TokenType.STRING: 3939 expressions = [primary] 3940 while self._match(TokenType.STRING): 3941 expressions.append(exp.Literal.string(self._prev.text)) 3942 3943 if len(expressions) > 1: 3944 return self.expression(exp.Concat, expressions=expressions) 3945 3946 return primary 3947 3948 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3949 return exp.Literal.number(f"0.{self._prev.text}") 3950 3951 if self._match(TokenType.L_PAREN): 3952 comments = self._prev_comments 3953 query = self._parse_select() 3954 3955 if query: 3956 expressions = [query] 3957 else: 3958 expressions = self._parse_expressions() 3959 3960 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3961 3962 if isinstance(this, exp.Subqueryable): 3963 this = self._parse_set_operations( 3964 self._parse_subquery(this=this, parse_alias=False) 3965 ) 3966 elif len(expressions) > 1: 3967 this = self.expression(exp.Tuple, expressions=expressions) 3968 else: 3969 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3970 3971 if this: 3972 this.add_comments(comments) 3973 3974 self._match_r_paren(expression=this) 3975 return this 3976 3977 return None 3978 3979 def _parse_field( 3980 self, 3981 any_token: bool = False, 3982 tokens: t.Optional[t.Collection[TokenType]] = None, 3983 anonymous_func: bool = False, 3984 ) -> t.Optional[exp.Expression]: 3985 return ( 3986 self._parse_primary() 3987 or self._parse_function(anonymous=anonymous_func) 3988 or self._parse_id_var(any_token=any_token, tokens=tokens) 3989 ) 3990 3991 def _parse_function( 3992 self, 3993 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3994 anonymous: bool = False, 3995 optional_parens: bool = True, 3996 ) -> t.Optional[exp.Expression]: 3997 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3998 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3999 fn_syntax = False 4000 if ( 4001 self._match(TokenType.L_BRACE, advance=False) 4002 and self._next 4003 and self._next.text.upper() == "FN" 4004 ): 4005 self._advance(2) 4006 fn_syntax = True 4007 4008 func = self._parse_function_call( 4009 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4010 ) 4011 4012 if fn_syntax: 4013 self._match(TokenType.R_BRACE) 4014 4015 return func 4016 4017 def _parse_function_call( 4018 self, 4019 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4020 anonymous: bool = False, 4021 optional_parens: bool = True, 4022 ) -> t.Optional[exp.Expression]: 4023 if not self._curr: 4024 return None 4025 4026 comments = self._curr.comments 4027 token_type = self._curr.token_type 4028 this = self._curr.text 4029 upper = this.upper() 4030 4031 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4032 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4033 self._advance() 4034 return parser(self) 4035 4036 if not self._next or self._next.token_type != TokenType.L_PAREN: 4037 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4038 self._advance() 4039 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4040 4041 return None 4042 4043 if token_type not in self.FUNC_TOKENS: 4044 return None 4045 4046 self._advance(2) 4047 4048 parser = self.FUNCTION_PARSERS.get(upper) 4049 if parser and not anonymous: 4050 this = parser(self) 4051 else: 4052 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4053 4054 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4055 this = self.expression(subquery_predicate, this=self._parse_select()) 4056 self._match_r_paren() 4057 return this 4058 4059 if functions is None: 4060 functions = self.FUNCTIONS 4061 4062 function = functions.get(upper) 4063 4064 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4065 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4066 4067 if function and not anonymous: 4068 if "dialect" in function.__code__.co_varnames: 4069 func = function(args, dialect=self.dialect) 4070 else: 4071 func = function(args) 4072 4073 func = self.validate_expression(func, args) 4074 if not self.dialect.NORMALIZE_FUNCTIONS: 4075 func.meta["name"] = this 4076 4077 this = func 4078 else: 4079 this = self.expression(exp.Anonymous, this=this, expressions=args) 4080 4081 if isinstance(this, exp.Expression): 4082 this.add_comments(comments) 4083 4084 self._match_r_paren(this) 4085 return self._parse_window(this) 4086 4087 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4088 return self._parse_column_def(self._parse_id_var()) 4089 4090 def _parse_user_defined_function( 4091 self, kind: t.Optional[TokenType] = None 4092 ) -> t.Optional[exp.Expression]: 4093 this = self._parse_id_var() 4094 4095 while self._match(TokenType.DOT): 4096 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4097 4098 if not self._match(TokenType.L_PAREN): 4099 return this 4100 4101 expressions = self._parse_csv(self._parse_function_parameter) 4102 self._match_r_paren() 4103 return self.expression( 4104 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4105 ) 4106 4107 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4108 literal = self._parse_primary() 4109 if literal: 4110 return self.expression(exp.Introducer, this=token.text, expression=literal) 4111 4112 return self.expression(exp.Identifier, this=token.text) 4113 4114 def _parse_session_parameter(self) -> exp.SessionParameter: 4115 kind = None 4116 this = self._parse_id_var() or self._parse_primary() 4117 4118 if this and self._match(TokenType.DOT): 4119 kind = this.name 4120 this = self._parse_var() or self._parse_primary() 4121 4122 return self.expression(exp.SessionParameter, this=this, kind=kind) 4123 4124 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4125 index = self._index 4126 4127 if self._match(TokenType.L_PAREN): 4128 expressions = t.cast( 4129 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4130 ) 4131 4132 if not self._match(TokenType.R_PAREN): 4133 self._retreat(index) 4134 else: 4135 expressions = [self._parse_id_var()] 4136 4137 if self._match_set(self.LAMBDAS): 4138 return self.LAMBDAS[self._prev.token_type](self, expressions) 4139 4140 self._retreat(index) 4141 4142 this: t.Optional[exp.Expression] 4143 4144 if self._match(TokenType.DISTINCT): 4145 this = self.expression( 4146 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4147 ) 4148 else: 4149 this = self._parse_select_or_expression(alias=alias) 4150 4151 return self._parse_limit( 4152 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4153 ) 4154 4155 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4156 index = self._index 4157 4158 if not self.errors: 4159 try: 4160 if self._parse_select(nested=True): 4161 return this 4162 except ParseError: 4163 pass 4164 finally: 4165 self.errors.clear() 4166 self._retreat(index) 4167 4168 if not self._match(TokenType.L_PAREN): 4169 return this 4170 4171 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4172 4173 self._match_r_paren() 4174 return self.expression(exp.Schema, this=this, expressions=args) 4175 4176 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4177 return self._parse_column_def(self._parse_field(any_token=True)) 4178 4179 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4180 # column defs are not really columns, they're identifiers 4181 if isinstance(this, exp.Column): 4182 this = this.this 4183 4184 kind = self._parse_types(schema=True) 4185 4186 if self._match_text_seq("FOR", "ORDINALITY"): 4187 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4188 4189 constraints: t.List[exp.Expression] = [] 4190 4191 if not kind and self._match(TokenType.ALIAS): 4192 constraints.append( 4193 self.expression( 4194 exp.ComputedColumnConstraint, 4195 this=self._parse_conjunction(), 4196 persisted=self._match_text_seq("PERSISTED"), 4197 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4198 ) 4199 ) 4200 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4201 self._match(TokenType.ALIAS) 4202 constraints.append( 4203 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4204 ) 4205 4206 while True: 4207 constraint = self._parse_column_constraint() 4208 if not constraint: 4209 break 4210 constraints.append(constraint) 4211 4212 if not kind and not constraints: 4213 return this 4214 4215 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4216 4217 def _parse_auto_increment( 4218 self, 4219 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4220 start = None 4221 increment = None 4222 4223 if self._match(TokenType.L_PAREN, advance=False): 4224 args = self._parse_wrapped_csv(self._parse_bitwise) 4225 start = seq_get(args, 0) 4226 increment = seq_get(args, 1) 4227 elif self._match_text_seq("START"): 4228 start = self._parse_bitwise() 4229 self._match_text_seq("INCREMENT") 4230 increment = self._parse_bitwise() 4231 4232 if start and increment: 4233 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4234 4235 return exp.AutoIncrementColumnConstraint() 4236 4237 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4238 if not self._match_text_seq("REFRESH"): 4239 self._retreat(self._index - 1) 4240 return None 4241 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4242 4243 def _parse_compress(self) -> exp.CompressColumnConstraint: 4244 if self._match(TokenType.L_PAREN, advance=False): 4245 return self.expression( 4246 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4247 ) 4248 4249 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4250 4251 def _parse_generated_as_identity( 4252 self, 4253 ) -> ( 4254 exp.GeneratedAsIdentityColumnConstraint 4255 | exp.ComputedColumnConstraint 4256 | exp.GeneratedAsRowColumnConstraint 4257 ): 4258 if self._match_text_seq("BY", "DEFAULT"): 4259 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4260 this = self.expression( 4261 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4262 ) 4263 else: 4264 self._match_text_seq("ALWAYS") 4265 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4266 4267 self._match(TokenType.ALIAS) 4268 4269 if self._match_text_seq("ROW"): 4270 start = self._match_text_seq("START") 4271 if not start: 4272 self._match(TokenType.END) 4273 hidden = self._match_text_seq("HIDDEN") 4274 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4275 4276 identity = self._match_text_seq("IDENTITY") 4277 4278 if self._match(TokenType.L_PAREN): 4279 if self._match(TokenType.START_WITH): 4280 this.set("start", self._parse_bitwise()) 4281 if self._match_text_seq("INCREMENT", "BY"): 4282 this.set("increment", self._parse_bitwise()) 4283 if self._match_text_seq("MINVALUE"): 4284 this.set("minvalue", self._parse_bitwise()) 4285 if self._match_text_seq("MAXVALUE"): 4286 this.set("maxvalue", self._parse_bitwise()) 4287 4288 if self._match_text_seq("CYCLE"): 4289 this.set("cycle", True) 4290 elif self._match_text_seq("NO", "CYCLE"): 4291 this.set("cycle", False) 4292 4293 if not identity: 4294 this.set("expression", self._parse_bitwise()) 4295 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4296 args = self._parse_csv(self._parse_bitwise) 4297 this.set("start", seq_get(args, 0)) 4298 this.set("increment", seq_get(args, 1)) 4299 4300 self._match_r_paren() 4301 4302 return this 4303 4304 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4305 self._match_text_seq("LENGTH") 4306 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4307 4308 def _parse_not_constraint( 4309 self, 4310 ) -> t.Optional[exp.Expression]: 4311 if self._match_text_seq("NULL"): 4312 return self.expression(exp.NotNullColumnConstraint) 4313 if self._match_text_seq("CASESPECIFIC"): 4314 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4315 if self._match_text_seq("FOR", "REPLICATION"): 4316 return self.expression(exp.NotForReplicationColumnConstraint) 4317 return None 4318 4319 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4320 if self._match(TokenType.CONSTRAINT): 4321 this = self._parse_id_var() 4322 else: 4323 this = None 4324 4325 if self._match_texts(self.CONSTRAINT_PARSERS): 4326 return self.expression( 4327 exp.ColumnConstraint, 4328 this=this, 4329 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4330 ) 4331 4332 return this 4333 4334 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4335 if not self._match(TokenType.CONSTRAINT): 4336 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4337 4338 this = self._parse_id_var() 4339 expressions = [] 4340 4341 while True: 4342 constraint = self._parse_unnamed_constraint() or self._parse_function() 4343 if not constraint: 4344 break 4345 expressions.append(constraint) 4346 4347 return self.expression(exp.Constraint, this=this, expressions=expressions) 4348 4349 def _parse_unnamed_constraint( 4350 self, constraints: t.Optional[t.Collection[str]] = None 4351 ) -> t.Optional[exp.Expression]: 4352 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4353 constraints or self.CONSTRAINT_PARSERS 4354 ): 4355 return None 4356 4357 constraint = self._prev.text.upper() 4358 if constraint not in self.CONSTRAINT_PARSERS: 4359 self.raise_error(f"No parser found for schema constraint {constraint}.") 4360 4361 return self.CONSTRAINT_PARSERS[constraint](self) 4362 4363 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4364 self._match_text_seq("KEY") 4365 return self.expression( 4366 exp.UniqueColumnConstraint, 4367 this=self._parse_schema(self._parse_id_var(any_token=False)), 4368 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4369 ) 4370 4371 def _parse_key_constraint_options(self) -> t.List[str]: 4372 options = [] 4373 while True: 4374 if not self._curr: 4375 break 4376 4377 if self._match(TokenType.ON): 4378 action = None 4379 on = self._advance_any() and self._prev.text 4380 4381 if self._match_text_seq("NO", "ACTION"): 4382 action = "NO ACTION" 4383 elif self._match_text_seq("CASCADE"): 4384 action = "CASCADE" 4385 elif self._match_text_seq("RESTRICT"): 4386 action = "RESTRICT" 4387 elif self._match_pair(TokenType.SET, TokenType.NULL): 4388 action = "SET NULL" 4389 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4390 action = "SET DEFAULT" 4391 else: 4392 self.raise_error("Invalid key constraint") 4393 4394 options.append(f"ON {on} {action}") 4395 elif self._match_text_seq("NOT", "ENFORCED"): 4396 options.append("NOT ENFORCED") 4397 elif self._match_text_seq("DEFERRABLE"): 4398 options.append("DEFERRABLE") 4399 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4400 options.append("INITIALLY DEFERRED") 4401 elif self._match_text_seq("NORELY"): 4402 options.append("NORELY") 4403 elif self._match_text_seq("MATCH", "FULL"): 4404 options.append("MATCH FULL") 4405 else: 4406 break 4407 4408 return options 4409 4410 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4411 if match and not self._match(TokenType.REFERENCES): 4412 return None 4413 4414 expressions = None 4415 this = self._parse_table(schema=True) 4416 options = self._parse_key_constraint_options() 4417 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4418 4419 def _parse_foreign_key(self) -> exp.ForeignKey: 4420 expressions = self._parse_wrapped_id_vars() 4421 reference = self._parse_references() 4422 options = {} 4423 4424 while self._match(TokenType.ON): 4425 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4426 self.raise_error("Expected DELETE or UPDATE") 4427 4428 kind = self._prev.text.lower() 4429 4430 if self._match_text_seq("NO", "ACTION"): 4431 action = "NO ACTION" 4432 elif self._match(TokenType.SET): 4433 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4434 action = "SET " + self._prev.text.upper() 4435 else: 4436 self._advance() 4437 action = self._prev.text.upper() 4438 4439 options[kind] = action 4440 4441 return self.expression( 4442 exp.ForeignKey, 4443 expressions=expressions, 4444 reference=reference, 4445 **options, # type: ignore 4446 ) 4447 4448 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4449 return self._parse_field() 4450 4451 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4452 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4453 self._retreat(self._index - 1) 4454 return None 4455 4456 id_vars = self._parse_wrapped_id_vars() 4457 return self.expression( 4458 exp.PeriodForSystemTimeConstraint, 4459 this=seq_get(id_vars, 0), 4460 expression=seq_get(id_vars, 1), 4461 ) 4462 4463 def _parse_primary_key( 4464 self, wrapped_optional: bool = False, in_props: bool = False 4465 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4466 desc = ( 4467 self._match_set((TokenType.ASC, TokenType.DESC)) 4468 and self._prev.token_type == TokenType.DESC 4469 ) 4470 4471 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4472 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4473 4474 expressions = self._parse_wrapped_csv( 4475 self._parse_primary_key_part, optional=wrapped_optional 4476 ) 4477 options = self._parse_key_constraint_options() 4478 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4479 4480 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4481 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4482 4483 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4484 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4485 return this 4486 4487 bracket_kind = self._prev.token_type 4488 expressions = self._parse_csv( 4489 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4490 ) 4491 4492 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4493 self.raise_error("Expected ]") 4494 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4495 self.raise_error("Expected }") 4496 4497 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4498 if bracket_kind == TokenType.L_BRACE: 4499 this = self.expression(exp.Struct, expressions=expressions) 4500 elif not this or this.name.upper() == "ARRAY": 4501 this = self.expression(exp.Array, expressions=expressions) 4502 else: 4503 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4504 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4505 4506 self._add_comments(this) 4507 return self._parse_bracket(this) 4508 4509 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4510 if self._match(TokenType.COLON): 4511 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4512 return this 4513 4514 def _parse_case(self) -> t.Optional[exp.Expression]: 4515 ifs = [] 4516 default = None 4517 4518 comments = self._prev_comments 4519 expression = self._parse_conjunction() 4520 4521 while self._match(TokenType.WHEN): 4522 this = self._parse_conjunction() 4523 self._match(TokenType.THEN) 4524 then = self._parse_conjunction() 4525 ifs.append(self.expression(exp.If, this=this, true=then)) 4526 4527 if self._match(TokenType.ELSE): 4528 default = self._parse_conjunction() 4529 4530 if not self._match(TokenType.END): 4531 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4532 default = exp.column("interval") 4533 else: 4534 self.raise_error("Expected END after CASE", self._prev) 4535 4536 return self._parse_window( 4537 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4538 ) 4539 4540 def _parse_if(self) -> t.Optional[exp.Expression]: 4541 if self._match(TokenType.L_PAREN): 4542 args = self._parse_csv(self._parse_conjunction) 4543 this = self.validate_expression(exp.If.from_arg_list(args), args) 4544 self._match_r_paren() 4545 else: 4546 index = self._index - 1 4547 4548 if self.NO_PAREN_IF_COMMANDS and index == 0: 4549 return self._parse_as_command(self._prev) 4550 4551 condition = self._parse_conjunction() 4552 4553 if not condition: 4554 self._retreat(index) 4555 return None 4556 4557 self._match(TokenType.THEN) 4558 true = self._parse_conjunction() 4559 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4560 self._match(TokenType.END) 4561 this = self.expression(exp.If, this=condition, true=true, false=false) 4562 4563 return self._parse_window(this) 4564 4565 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4566 if not self._match_text_seq("VALUE", "FOR"): 4567 self._retreat(self._index - 1) 4568 return None 4569 4570 return self.expression( 4571 exp.NextValueFor, 4572 this=self._parse_column(), 4573 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4574 ) 4575 4576 def _parse_extract(self) -> exp.Extract: 4577 this = self._parse_function() or self._parse_var() or self._parse_type() 4578 4579 if self._match(TokenType.FROM): 4580 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4581 4582 if not self._match(TokenType.COMMA): 4583 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4584 4585 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4586 4587 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4588 this = self._parse_conjunction() 4589 4590 if not self._match(TokenType.ALIAS): 4591 if self._match(TokenType.COMMA): 4592 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4593 4594 self.raise_error("Expected AS after CAST") 4595 4596 fmt = None 4597 to = self._parse_types() 4598 4599 if self._match(TokenType.FORMAT): 4600 fmt_string = self._parse_string() 4601 fmt = self._parse_at_time_zone(fmt_string) 4602 4603 if not to: 4604 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4605 if to.this in exp.DataType.TEMPORAL_TYPES: 4606 this = self.expression( 4607 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4608 this=this, 4609 format=exp.Literal.string( 4610 format_time( 4611 fmt_string.this if fmt_string else "", 4612 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4613 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4614 ) 4615 ), 4616 ) 4617 4618 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4619 this.set("zone", fmt.args["zone"]) 4620 return this 4621 elif not to: 4622 self.raise_error("Expected TYPE after CAST") 4623 elif isinstance(to, exp.Identifier): 4624 to = exp.DataType.build(to.name, udt=True) 4625 elif to.this == exp.DataType.Type.CHAR: 4626 if self._match(TokenType.CHARACTER_SET): 4627 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4628 4629 return self.expression( 4630 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4631 ) 4632 4633 def _parse_string_agg(self) -> exp.Expression: 4634 if self._match(TokenType.DISTINCT): 4635 args: t.List[t.Optional[exp.Expression]] = [ 4636 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4637 ] 4638 if self._match(TokenType.COMMA): 4639 args.extend(self._parse_csv(self._parse_conjunction)) 4640 else: 4641 args = self._parse_csv(self._parse_conjunction) # type: ignore 4642 4643 index = self._index 4644 if not self._match(TokenType.R_PAREN) and args: 4645 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4646 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4647 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4648 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4649 4650 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4651 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4652 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4653 if not self._match_text_seq("WITHIN", "GROUP"): 4654 self._retreat(index) 4655 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4656 4657 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4658 order = self._parse_order(this=seq_get(args, 0)) 4659 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4660 4661 def _parse_convert( 4662 self, strict: bool, safe: t.Optional[bool] = None 4663 ) -> t.Optional[exp.Expression]: 4664 this = self._parse_bitwise() 4665 4666 if self._match(TokenType.USING): 4667 to: t.Optional[exp.Expression] = self.expression( 4668 exp.CharacterSet, this=self._parse_var() 4669 ) 4670 elif self._match(TokenType.COMMA): 4671 to = self._parse_types() 4672 else: 4673 to = None 4674 4675 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4676 4677 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4678 """ 4679 There are generally two variants of the DECODE function: 4680 4681 - DECODE(bin, charset) 4682 - DECODE(expression, search, result [, search, result] ... [, default]) 4683 4684 The second variant will always be parsed into a CASE expression. Note that NULL 4685 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4686 instead of relying on pattern matching. 4687 """ 4688 args = self._parse_csv(self._parse_conjunction) 4689 4690 if len(args) < 3: 4691 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4692 4693 expression, *expressions = args 4694 if not expression: 4695 return None 4696 4697 ifs = [] 4698 for search, result in zip(expressions[::2], expressions[1::2]): 4699 if not search or not result: 4700 return None 4701 4702 if isinstance(search, exp.Literal): 4703 ifs.append( 4704 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4705 ) 4706 elif isinstance(search, exp.Null): 4707 ifs.append( 4708 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4709 ) 4710 else: 4711 cond = exp.or_( 4712 exp.EQ(this=expression.copy(), expression=search), 4713 exp.and_( 4714 exp.Is(this=expression.copy(), expression=exp.Null()), 4715 exp.Is(this=search.copy(), expression=exp.Null()), 4716 copy=False, 4717 ), 4718 copy=False, 4719 ) 4720 ifs.append(exp.If(this=cond, true=result)) 4721 4722 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4723 4724 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4725 self._match_text_seq("KEY") 4726 key = self._parse_column() 4727 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4728 self._match_text_seq("VALUE") 4729 value = self._parse_bitwise() 4730 4731 if not key and not value: 4732 return None 4733 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4734 4735 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4736 if not this or not self._match_text_seq("FORMAT", "JSON"): 4737 return this 4738 4739 return self.expression(exp.FormatJson, this=this) 4740 4741 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4742 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4743 for value in values: 4744 if self._match_text_seq(value, "ON", on): 4745 return f"{value} ON {on}" 4746 4747 return None 4748 4749 @t.overload 4750 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 4751 ... 4752 4753 @t.overload 4754 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 4755 ... 4756 4757 def _parse_json_object(self, agg=False): 4758 star = self._parse_star() 4759 expressions = ( 4760 [star] 4761 if star 4762 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4763 ) 4764 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4765 4766 unique_keys = None 4767 if self._match_text_seq("WITH", "UNIQUE"): 4768 unique_keys = True 4769 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4770 unique_keys = False 4771 4772 self._match_text_seq("KEYS") 4773 4774 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4775 self._parse_type() 4776 ) 4777 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4778 4779 return self.expression( 4780 exp.JSONObjectAgg if agg else exp.JSONObject, 4781 expressions=expressions, 4782 null_handling=null_handling, 4783 unique_keys=unique_keys, 4784 return_type=return_type, 4785 encoding=encoding, 4786 ) 4787 4788 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4789 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4790 if not self._match_text_seq("NESTED"): 4791 this = self._parse_id_var() 4792 kind = self._parse_types(allow_identifiers=False) 4793 nested = None 4794 else: 4795 this = None 4796 kind = None 4797 nested = True 4798 4799 path = self._match_text_seq("PATH") and self._parse_string() 4800 nested_schema = nested and self._parse_json_schema() 4801 4802 return self.expression( 4803 exp.JSONColumnDef, 4804 this=this, 4805 kind=kind, 4806 path=path, 4807 nested_schema=nested_schema, 4808 ) 4809 4810 def _parse_json_schema(self) -> exp.JSONSchema: 4811 self._match_text_seq("COLUMNS") 4812 return self.expression( 4813 exp.JSONSchema, 4814 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4815 ) 4816 4817 def _parse_json_table(self) -> exp.JSONTable: 4818 this = self._parse_format_json(self._parse_bitwise()) 4819 path = self._match(TokenType.COMMA) and self._parse_string() 4820 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4821 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4822 schema = self._parse_json_schema() 4823 4824 return exp.JSONTable( 4825 this=this, 4826 schema=schema, 4827 path=path, 4828 error_handling=error_handling, 4829 empty_handling=empty_handling, 4830 ) 4831 4832 def _parse_match_against(self) -> exp.MatchAgainst: 4833 expressions = self._parse_csv(self._parse_column) 4834 4835 self._match_text_seq(")", "AGAINST", "(") 4836 4837 this = self._parse_string() 4838 4839 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4840 modifier = "IN NATURAL LANGUAGE MODE" 4841 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4842 modifier = f"{modifier} WITH QUERY EXPANSION" 4843 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4844 modifier = "IN BOOLEAN MODE" 4845 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4846 modifier = "WITH QUERY EXPANSION" 4847 else: 4848 modifier = None 4849 4850 return self.expression( 4851 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4852 ) 4853 4854 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4855 def _parse_open_json(self) -> exp.OpenJSON: 4856 this = self._parse_bitwise() 4857 path = self._match(TokenType.COMMA) and self._parse_string() 4858 4859 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4860 this = self._parse_field(any_token=True) 4861 kind = self._parse_types() 4862 path = self._parse_string() 4863 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4864 4865 return self.expression( 4866 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4867 ) 4868 4869 expressions = None 4870 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4871 self._match_l_paren() 4872 expressions = self._parse_csv(_parse_open_json_column_def) 4873 4874 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4875 4876 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4877 args = self._parse_csv(self._parse_bitwise) 4878 4879 if self._match(TokenType.IN): 4880 return self.expression( 4881 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4882 ) 4883 4884 if haystack_first: 4885 haystack = seq_get(args, 0) 4886 needle = seq_get(args, 1) 4887 else: 4888 needle = seq_get(args, 0) 4889 haystack = seq_get(args, 1) 4890 4891 return self.expression( 4892 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4893 ) 4894 4895 def _parse_predict(self) -> exp.Predict: 4896 self._match_text_seq("MODEL") 4897 this = self._parse_table() 4898 4899 self._match(TokenType.COMMA) 4900 self._match_text_seq("TABLE") 4901 4902 return self.expression( 4903 exp.Predict, 4904 this=this, 4905 expression=self._parse_table(), 4906 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4907 ) 4908 4909 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4910 args = self._parse_csv(self._parse_table) 4911 return exp.JoinHint(this=func_name.upper(), expressions=args) 4912 4913 def _parse_substring(self) -> exp.Substring: 4914 # Postgres supports the form: substring(string [from int] [for int]) 4915 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4916 4917 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4918 4919 if self._match(TokenType.FROM): 4920 args.append(self._parse_bitwise()) 4921 if self._match(TokenType.FOR): 4922 args.append(self._parse_bitwise()) 4923 4924 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4925 4926 def _parse_trim(self) -> exp.Trim: 4927 # https://www.w3resource.com/sql/character-functions/trim.php 4928 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4929 4930 position = None 4931 collation = None 4932 expression = None 4933 4934 if self._match_texts(self.TRIM_TYPES): 4935 position = self._prev.text.upper() 4936 4937 this = self._parse_bitwise() 4938 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4939 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4940 expression = self._parse_bitwise() 4941 4942 if invert_order: 4943 this, expression = expression, this 4944 4945 if self._match(TokenType.COLLATE): 4946 collation = self._parse_bitwise() 4947 4948 return self.expression( 4949 exp.Trim, this=this, position=position, expression=expression, collation=collation 4950 ) 4951 4952 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4953 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4954 4955 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4956 return self._parse_window(self._parse_id_var(), alias=True) 4957 4958 def _parse_respect_or_ignore_nulls( 4959 self, this: t.Optional[exp.Expression] 4960 ) -> t.Optional[exp.Expression]: 4961 if self._match_text_seq("IGNORE", "NULLS"): 4962 return self.expression(exp.IgnoreNulls, this=this) 4963 if self._match_text_seq("RESPECT", "NULLS"): 4964 return self.expression(exp.RespectNulls, this=this) 4965 return this 4966 4967 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4968 if self._match(TokenType.HAVING): 4969 self._match_texts(("MAX", "MIN")) 4970 max = self._prev.text.upper() != "MIN" 4971 return self.expression( 4972 exp.HavingMax, this=this, expression=self._parse_column(), max=max 4973 ) 4974 4975 return this 4976 4977 def _parse_window( 4978 self, this: t.Optional[exp.Expression], alias: bool = False 4979 ) -> t.Optional[exp.Expression]: 4980 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4981 self._match(TokenType.WHERE) 4982 this = self.expression( 4983 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4984 ) 4985 self._match_r_paren() 4986 4987 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4988 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4989 if self._match_text_seq("WITHIN", "GROUP"): 4990 order = self._parse_wrapped(self._parse_order) 4991 this = self.expression(exp.WithinGroup, this=this, expression=order) 4992 4993 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4994 # Some dialects choose to implement and some do not. 4995 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4996 4997 # There is some code above in _parse_lambda that handles 4998 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4999 5000 # The below changes handle 5001 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5002 5003 # Oracle allows both formats 5004 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5005 # and Snowflake chose to do the same for familiarity 5006 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5007 if isinstance(this, exp.AggFunc): 5008 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5009 5010 if ignore_respect and ignore_respect is not this: 5011 ignore_respect.replace(ignore_respect.this) 5012 this = self.expression(ignore_respect.__class__, this=this) 5013 5014 this = self._parse_respect_or_ignore_nulls(this) 5015 5016 # bigquery select from window x AS (partition by ...) 5017 if alias: 5018 over = None 5019 self._match(TokenType.ALIAS) 5020 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5021 return this 5022 else: 5023 over = self._prev.text.upper() 5024 5025 if not self._match(TokenType.L_PAREN): 5026 return self.expression( 5027 exp.Window, this=this, alias=self._parse_id_var(False), over=over 5028 ) 5029 5030 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5031 5032 first = self._match(TokenType.FIRST) 5033 if self._match_text_seq("LAST"): 5034 first = False 5035 5036 partition, order = self._parse_partition_and_order() 5037 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5038 5039 if kind: 5040 self._match(TokenType.BETWEEN) 5041 start = self._parse_window_spec() 5042 self._match(TokenType.AND) 5043 end = self._parse_window_spec() 5044 5045 spec = self.expression( 5046 exp.WindowSpec, 5047 kind=kind, 5048 start=start["value"], 5049 start_side=start["side"], 5050 end=end["value"], 5051 end_side=end["side"], 5052 ) 5053 else: 5054 spec = None 5055 5056 self._match_r_paren() 5057 5058 window = self.expression( 5059 exp.Window, 5060 this=this, 5061 partition_by=partition, 5062 order=order, 5063 spec=spec, 5064 alias=window_alias, 5065 over=over, 5066 first=first, 5067 ) 5068 5069 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5070 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5071 return self._parse_window(window, alias=alias) 5072 5073 return window 5074 5075 def _parse_partition_and_order( 5076 self, 5077 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5078 return self._parse_partition_by(), self._parse_order() 5079 5080 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5081 self._match(TokenType.BETWEEN) 5082 5083 return { 5084 "value": ( 5085 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5086 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5087 or self._parse_bitwise() 5088 ), 5089 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5090 } 5091 5092 def _parse_alias( 5093 self, this: t.Optional[exp.Expression], explicit: bool = False 5094 ) -> t.Optional[exp.Expression]: 5095 any_token = self._match(TokenType.ALIAS) 5096 comments = self._prev_comments 5097 5098 if explicit and not any_token: 5099 return this 5100 5101 if self._match(TokenType.L_PAREN): 5102 aliases = self.expression( 5103 exp.Aliases, 5104 comments=comments, 5105 this=this, 5106 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5107 ) 5108 self._match_r_paren(aliases) 5109 return aliases 5110 5111 alias = self._parse_id_var(any_token) or ( 5112 self.STRING_ALIASES and self._parse_string_as_identifier() 5113 ) 5114 5115 if alias: 5116 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5117 column = this.this 5118 5119 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5120 if not this.comments and column and column.comments: 5121 this.comments = column.comments 5122 column.comments = None 5123 5124 return this 5125 5126 def _parse_id_var( 5127 self, 5128 any_token: bool = True, 5129 tokens: t.Optional[t.Collection[TokenType]] = None, 5130 ) -> t.Optional[exp.Expression]: 5131 identifier = self._parse_identifier() 5132 5133 if identifier: 5134 return identifier 5135 5136 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5137 quoted = self._prev.token_type == TokenType.STRING 5138 return exp.Identifier(this=self._prev.text, quoted=quoted) 5139 5140 return None 5141 5142 def _parse_string(self) -> t.Optional[exp.Expression]: 5143 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 5144 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 5145 return self._parse_placeholder() 5146 5147 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5148 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5149 5150 def _parse_number(self) -> t.Optional[exp.Expression]: 5151 if self._match(TokenType.NUMBER): 5152 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 5153 return self._parse_placeholder() 5154 5155 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5156 if self._match(TokenType.IDENTIFIER): 5157 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5158 return self._parse_placeholder() 5159 5160 def _parse_var( 5161 self, 5162 any_token: bool = False, 5163 tokens: t.Optional[t.Collection[TokenType]] = None, 5164 upper: bool = False, 5165 ) -> t.Optional[exp.Expression]: 5166 if ( 5167 (any_token and self._advance_any()) 5168 or self._match(TokenType.VAR) 5169 or (self._match_set(tokens) if tokens else False) 5170 ): 5171 return self.expression( 5172 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5173 ) 5174 return self._parse_placeholder() 5175 5176 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5177 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5178 self._advance() 5179 return self._prev 5180 return None 5181 5182 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5183 return self._parse_var() or self._parse_string() 5184 5185 def _parse_null(self) -> t.Optional[exp.Expression]: 5186 if self._match_set(self.NULL_TOKENS): 5187 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5188 return self._parse_placeholder() 5189 5190 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5191 if self._match(TokenType.TRUE): 5192 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5193 if self._match(TokenType.FALSE): 5194 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5195 return self._parse_placeholder() 5196 5197 def _parse_star(self) -> t.Optional[exp.Expression]: 5198 if self._match(TokenType.STAR): 5199 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5200 return self._parse_placeholder() 5201 5202 def _parse_parameter(self) -> exp.Parameter: 5203 def _parse_parameter_part() -> t.Optional[exp.Expression]: 5204 return ( 5205 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 5206 ) 5207 5208 self._match(TokenType.L_BRACE) 5209 this = _parse_parameter_part() 5210 expression = self._match(TokenType.COLON) and _parse_parameter_part() 5211 self._match(TokenType.R_BRACE) 5212 5213 return self.expression(exp.Parameter, this=this, expression=expression) 5214 5215 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5216 if self._match_set(self.PLACEHOLDER_PARSERS): 5217 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5218 if placeholder: 5219 return placeholder 5220 self._advance(-1) 5221 return None 5222 5223 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5224 if not self._match(TokenType.EXCEPT): 5225 return None 5226 if self._match(TokenType.L_PAREN, advance=False): 5227 return self._parse_wrapped_csv(self._parse_column) 5228 5229 except_column = self._parse_column() 5230 return [except_column] if except_column else None 5231 5232 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5233 if not self._match(TokenType.REPLACE): 5234 return None 5235 if self._match(TokenType.L_PAREN, advance=False): 5236 return self._parse_wrapped_csv(self._parse_expression) 5237 5238 replace_expression = self._parse_expression() 5239 return [replace_expression] if replace_expression else None 5240 5241 def _parse_csv( 5242 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5243 ) -> t.List[exp.Expression]: 5244 parse_result = parse_method() 5245 items = [parse_result] if parse_result is not None else [] 5246 5247 while self._match(sep): 5248 self._add_comments(parse_result) 5249 parse_result = parse_method() 5250 if parse_result is not None: 5251 items.append(parse_result) 5252 5253 return items 5254 5255 def _parse_tokens( 5256 self, parse_method: t.Callable, expressions: t.Dict 5257 ) -> t.Optional[exp.Expression]: 5258 this = parse_method() 5259 5260 while self._match_set(expressions): 5261 this = self.expression( 5262 expressions[self._prev.token_type], 5263 this=this, 5264 comments=self._prev_comments, 5265 expression=parse_method(), 5266 ) 5267 5268 return this 5269 5270 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5271 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5272 5273 def _parse_wrapped_csv( 5274 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5275 ) -> t.List[exp.Expression]: 5276 return self._parse_wrapped( 5277 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5278 ) 5279 5280 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5281 wrapped = self._match(TokenType.L_PAREN) 5282 if not wrapped and not optional: 5283 self.raise_error("Expecting (") 5284 parse_result = parse_method() 5285 if wrapped: 5286 self._match_r_paren() 5287 return parse_result 5288 5289 def _parse_expressions(self) -> t.List[exp.Expression]: 5290 return self._parse_csv(self._parse_expression) 5291 5292 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5293 return self._parse_select() or self._parse_set_operations( 5294 self._parse_expression() if alias else self._parse_conjunction() 5295 ) 5296 5297 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5298 return self._parse_query_modifiers( 5299 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5300 ) 5301 5302 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5303 this = None 5304 if self._match_texts(self.TRANSACTION_KIND): 5305 this = self._prev.text 5306 5307 self._match_texts(("TRANSACTION", "WORK")) 5308 5309 modes = [] 5310 while True: 5311 mode = [] 5312 while self._match(TokenType.VAR): 5313 mode.append(self._prev.text) 5314 5315 if mode: 5316 modes.append(" ".join(mode)) 5317 if not self._match(TokenType.COMMA): 5318 break 5319 5320 return self.expression(exp.Transaction, this=this, modes=modes) 5321 5322 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5323 chain = None 5324 savepoint = None 5325 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5326 5327 self._match_texts(("TRANSACTION", "WORK")) 5328 5329 if self._match_text_seq("TO"): 5330 self._match_text_seq("SAVEPOINT") 5331 savepoint = self._parse_id_var() 5332 5333 if self._match(TokenType.AND): 5334 chain = not self._match_text_seq("NO") 5335 self._match_text_seq("CHAIN") 5336 5337 if is_rollback: 5338 return self.expression(exp.Rollback, savepoint=savepoint) 5339 5340 return self.expression(exp.Commit, chain=chain) 5341 5342 def _parse_refresh(self) -> exp.Refresh: 5343 self._match(TokenType.TABLE) 5344 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5345 5346 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5347 if not self._match_text_seq("ADD"): 5348 return None 5349 5350 self._match(TokenType.COLUMN) 5351 exists_column = self._parse_exists(not_=True) 5352 expression = self._parse_field_def() 5353 5354 if expression: 5355 expression.set("exists", exists_column) 5356 5357 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5358 if self._match_texts(("FIRST", "AFTER")): 5359 position = self._prev.text 5360 column_position = self.expression( 5361 exp.ColumnPosition, this=self._parse_column(), position=position 5362 ) 5363 expression.set("position", column_position) 5364 5365 return expression 5366 5367 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5368 drop = self._match(TokenType.DROP) and self._parse_drop() 5369 if drop and not isinstance(drop, exp.Command): 5370 drop.set("kind", drop.args.get("kind", "COLUMN")) 5371 return drop 5372 5373 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5374 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5375 return self.expression( 5376 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5377 ) 5378 5379 def _parse_add_constraint(self) -> exp.AddConstraint: 5380 this = None 5381 kind = self._prev.token_type 5382 5383 if kind == TokenType.CONSTRAINT: 5384 this = self._parse_id_var() 5385 5386 if self._match_text_seq("CHECK"): 5387 expression = self._parse_wrapped(self._parse_conjunction) 5388 enforced = self._match_text_seq("ENFORCED") or False 5389 5390 return self.expression( 5391 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5392 ) 5393 5394 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5395 expression = self._parse_foreign_key() 5396 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5397 expression = self._parse_primary_key() 5398 else: 5399 expression = None 5400 5401 return self.expression(exp.AddConstraint, this=this, expression=expression) 5402 5403 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5404 index = self._index - 1 5405 5406 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5407 return self._parse_csv(self._parse_add_constraint) 5408 5409 self._retreat(index) 5410 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5411 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5412 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5413 5414 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5415 self._match(TokenType.COLUMN) 5416 column = self._parse_field(any_token=True) 5417 5418 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5419 return self.expression(exp.AlterColumn, this=column, drop=True) 5420 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5421 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5422 if self._match(TokenType.COMMENT): 5423 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5424 5425 self._match_text_seq("SET", "DATA") 5426 return self.expression( 5427 exp.AlterColumn, 5428 this=column, 5429 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5430 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5431 using=self._match(TokenType.USING) and self._parse_conjunction(), 5432 ) 5433 5434 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5435 index = self._index - 1 5436 5437 partition_exists = self._parse_exists() 5438 if self._match(TokenType.PARTITION, advance=False): 5439 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5440 5441 self._retreat(index) 5442 return self._parse_csv(self._parse_drop_column) 5443 5444 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5445 if self._match(TokenType.COLUMN): 5446 exists = self._parse_exists() 5447 old_column = self._parse_column() 5448 to = self._match_text_seq("TO") 5449 new_column = self._parse_column() 5450 5451 if old_column is None or to is None or new_column is None: 5452 return None 5453 5454 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5455 5456 self._match_text_seq("TO") 5457 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5458 5459 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5460 start = self._prev 5461 5462 if not self._match(TokenType.TABLE): 5463 return self._parse_as_command(start) 5464 5465 exists = self._parse_exists() 5466 only = self._match_text_seq("ONLY") 5467 this = self._parse_table(schema=True) 5468 5469 if self._next: 5470 self._advance() 5471 5472 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5473 if parser: 5474 actions = ensure_list(parser(self)) 5475 5476 if not self._curr and actions: 5477 return self.expression( 5478 exp.AlterTable, 5479 this=this, 5480 exists=exists, 5481 actions=actions, 5482 only=only, 5483 ) 5484 5485 return self._parse_as_command(start) 5486 5487 def _parse_merge(self) -> exp.Merge: 5488 self._match(TokenType.INTO) 5489 target = self._parse_table() 5490 5491 if target and self._match(TokenType.ALIAS, advance=False): 5492 target.set("alias", self._parse_table_alias()) 5493 5494 self._match(TokenType.USING) 5495 using = self._parse_table() 5496 5497 self._match(TokenType.ON) 5498 on = self._parse_conjunction() 5499 5500 return self.expression( 5501 exp.Merge, 5502 this=target, 5503 using=using, 5504 on=on, 5505 expressions=self._parse_when_matched(), 5506 ) 5507 5508 def _parse_when_matched(self) -> t.List[exp.When]: 5509 whens = [] 5510 5511 while self._match(TokenType.WHEN): 5512 matched = not self._match(TokenType.NOT) 5513 self._match_text_seq("MATCHED") 5514 source = ( 5515 False 5516 if self._match_text_seq("BY", "TARGET") 5517 else self._match_text_seq("BY", "SOURCE") 5518 ) 5519 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5520 5521 self._match(TokenType.THEN) 5522 5523 if self._match(TokenType.INSERT): 5524 _this = self._parse_star() 5525 if _this: 5526 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5527 else: 5528 then = self.expression( 5529 exp.Insert, 5530 this=self._parse_value(), 5531 expression=self._match_text_seq("VALUES") and self._parse_value(), 5532 ) 5533 elif self._match(TokenType.UPDATE): 5534 expressions = self._parse_star() 5535 if expressions: 5536 then = self.expression(exp.Update, expressions=expressions) 5537 else: 5538 then = self.expression( 5539 exp.Update, 5540 expressions=self._match(TokenType.SET) 5541 and self._parse_csv(self._parse_equality), 5542 ) 5543 elif self._match(TokenType.DELETE): 5544 then = self.expression(exp.Var, this=self._prev.text) 5545 else: 5546 then = None 5547 5548 whens.append( 5549 self.expression( 5550 exp.When, 5551 matched=matched, 5552 source=source, 5553 condition=condition, 5554 then=then, 5555 ) 5556 ) 5557 return whens 5558 5559 def _parse_show(self) -> t.Optional[exp.Expression]: 5560 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5561 if parser: 5562 return parser(self) 5563 return self._parse_as_command(self._prev) 5564 5565 def _parse_set_item_assignment( 5566 self, kind: t.Optional[str] = None 5567 ) -> t.Optional[exp.Expression]: 5568 index = self._index 5569 5570 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5571 return self._parse_set_transaction(global_=kind == "GLOBAL") 5572 5573 left = self._parse_primary() or self._parse_id_var() 5574 assignment_delimiter = self._match_texts(("=", "TO")) 5575 5576 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5577 self._retreat(index) 5578 return None 5579 5580 right = self._parse_statement() or self._parse_id_var() 5581 this = self.expression(exp.EQ, this=left, expression=right) 5582 5583 return self.expression(exp.SetItem, this=this, kind=kind) 5584 5585 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5586 self._match_text_seq("TRANSACTION") 5587 characteristics = self._parse_csv( 5588 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5589 ) 5590 return self.expression( 5591 exp.SetItem, 5592 expressions=characteristics, 5593 kind="TRANSACTION", 5594 **{"global": global_}, # type: ignore 5595 ) 5596 5597 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5598 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5599 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5600 5601 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5602 index = self._index 5603 set_ = self.expression( 5604 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5605 ) 5606 5607 if self._curr: 5608 self._retreat(index) 5609 return self._parse_as_command(self._prev) 5610 5611 return set_ 5612 5613 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5614 for option in options: 5615 if self._match_text_seq(*option.split(" ")): 5616 return exp.var(option) 5617 return None 5618 5619 def _parse_as_command(self, start: Token) -> exp.Command: 5620 while self._curr: 5621 self._advance() 5622 text = self._find_sql(start, self._prev) 5623 size = len(start.text) 5624 self._warn_unsupported() 5625 return exp.Command(this=text[:size], expression=text[size:]) 5626 5627 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5628 settings = [] 5629 5630 self._match_l_paren() 5631 kind = self._parse_id_var() 5632 5633 if self._match(TokenType.L_PAREN): 5634 while True: 5635 key = self._parse_id_var() 5636 value = self._parse_primary() 5637 5638 if not key and value is None: 5639 break 5640 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5641 self._match(TokenType.R_PAREN) 5642 5643 self._match_r_paren() 5644 5645 return self.expression( 5646 exp.DictProperty, 5647 this=this, 5648 kind=kind.this if kind else None, 5649 settings=settings, 5650 ) 5651 5652 def _parse_dict_range(self, this: str) -> exp.DictRange: 5653 self._match_l_paren() 5654 has_min = self._match_text_seq("MIN") 5655 if has_min: 5656 min = self._parse_var() or self._parse_primary() 5657 self._match_text_seq("MAX") 5658 max = self._parse_var() or self._parse_primary() 5659 else: 5660 max = self._parse_var() or self._parse_primary() 5661 min = exp.Literal.number(0) 5662 self._match_r_paren() 5663 return self.expression(exp.DictRange, this=this, min=min, max=max) 5664 5665 def _parse_comprehension( 5666 self, this: t.Optional[exp.Expression] 5667 ) -> t.Optional[exp.Comprehension]: 5668 index = self._index 5669 expression = self._parse_column() 5670 if not self._match(TokenType.IN): 5671 self._retreat(index - 1) 5672 return None 5673 iterator = self._parse_column() 5674 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5675 return self.expression( 5676 exp.Comprehension, 5677 this=this, 5678 expression=expression, 5679 iterator=iterator, 5680 condition=condition, 5681 ) 5682 5683 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5684 if self._match(TokenType.HEREDOC_STRING): 5685 return self.expression(exp.Heredoc, this=self._prev.text) 5686 5687 if not self._match_text_seq("$"): 5688 return None 5689 5690 tags = ["$"] 5691 tag_text = None 5692 5693 if self._is_connected(): 5694 self._advance() 5695 tags.append(self._prev.text.upper()) 5696 else: 5697 self.raise_error("No closing $ found") 5698 5699 if tags[-1] != "$": 5700 if self._is_connected() and self._match_text_seq("$"): 5701 tag_text = tags[-1] 5702 tags.append("$") 5703 else: 5704 self.raise_error("No closing $ found") 5705 5706 heredoc_start = self._curr 5707 5708 while self._curr: 5709 if self._match_text_seq(*tags, advance=False): 5710 this = self._find_sql(heredoc_start, self._prev) 5711 self._advance(len(tags)) 5712 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5713 5714 self._advance() 5715 5716 self.raise_error(f"No closing {''.join(tags)} found") 5717 return None 5718 5719 def _find_parser( 5720 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5721 ) -> t.Optional[t.Callable]: 5722 if not self._curr: 5723 return None 5724 5725 index = self._index 5726 this = [] 5727 while True: 5728 # The current token might be multiple words 5729 curr = self._curr.text.upper() 5730 key = curr.split(" ") 5731 this.append(curr) 5732 5733 self._advance() 5734 result, trie = in_trie(trie, key) 5735 if result == TrieResult.FAILED: 5736 break 5737 5738 if result == TrieResult.EXISTS: 5739 subparser = parsers[" ".join(this)] 5740 return subparser 5741 5742 self._retreat(index) 5743 return None 5744 5745 def _match(self, token_type, advance=True, expression=None): 5746 if not self._curr: 5747 return None 5748 5749 if self._curr.token_type == token_type: 5750 if advance: 5751 self._advance() 5752 self._add_comments(expression) 5753 return True 5754 5755 return None 5756 5757 def _match_set(self, types, advance=True): 5758 if not self._curr: 5759 return None 5760 5761 if self._curr.token_type in types: 5762 if advance: 5763 self._advance() 5764 return True 5765 5766 return None 5767 5768 def _match_pair(self, token_type_a, token_type_b, advance=True): 5769 if not self._curr or not self._next: 5770 return None 5771 5772 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5773 if advance: 5774 self._advance(2) 5775 return True 5776 5777 return None 5778 5779 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5780 if not self._match(TokenType.L_PAREN, expression=expression): 5781 self.raise_error("Expecting (") 5782 5783 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5784 if not self._match(TokenType.R_PAREN, expression=expression): 5785 self.raise_error("Expecting )") 5786 5787 def _match_texts(self, texts, advance=True): 5788 if self._curr and self._curr.text.upper() in texts: 5789 if advance: 5790 self._advance() 5791 return True 5792 return None 5793 5794 def _match_text_seq(self, *texts, advance=True): 5795 index = self._index 5796 for text in texts: 5797 if self._curr and self._curr.text.upper() == text: 5798 self._advance() 5799 else: 5800 self._retreat(index) 5801 return None 5802 5803 if not advance: 5804 self._retreat(index) 5805 5806 return True 5807 5808 @t.overload 5809 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5810 ... 5811 5812 @t.overload 5813 def _replace_columns_with_dots( 5814 self, this: t.Optional[exp.Expression] 5815 ) -> t.Optional[exp.Expression]: 5816 ... 5817 5818 def _replace_columns_with_dots(self, this): 5819 if isinstance(this, exp.Dot): 5820 exp.replace_children(this, self._replace_columns_with_dots) 5821 elif isinstance(this, exp.Column): 5822 exp.replace_children(this, self._replace_columns_with_dots) 5823 table = this.args.get("table") 5824 this = ( 5825 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5826 ) 5827 5828 return this 5829 5830 def _replace_lambda( 5831 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5832 ) -> t.Optional[exp.Expression]: 5833 if not node: 5834 return node 5835 5836 for column in node.find_all(exp.Column): 5837 if column.parts[0].name in lambda_variables: 5838 dot_or_id = column.to_dot() if column.table else column.this 5839 parent = column.parent 5840 5841 while isinstance(parent, exp.Dot): 5842 if not isinstance(parent.parent, exp.Dot): 5843 parent.replace(dot_or_id) 5844 break 5845 parent = parent.parent 5846 else: 5847 if column is node: 5848 node = dot_or_id 5849 else: 5850 column.replace(dot_or_id) 5851 return node
22def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 23 if len(args) == 1 and args[0].is_star: 24 return exp.StarMap(this=args[0]) 25 26 keys = [] 27 values = [] 28 for i in range(0, len(args), 2): 29 keys.append(args[i]) 30 values.append(args[i + 1]) 31 32 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
48def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 49 # Default argument order is base, expression 50 this = seq_get(args, 0) 51 expression = seq_get(args, 1) 52 53 if expression: 54 if not dialect.LOG_BASE_FIRST: 55 this, expression = expression, this 56 return exp.Log(this=this, expression=expression) 57 58 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
61def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 62 def _builder(args: t.List, dialect: Dialect) -> E: 63 expression = expr_type( 64 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 65 ) 66 if len(args) > 2 and expr_type is exp.JSONExtract: 67 expression.set("expressions", args[2:]) 68 69 return expression 70 71 return _builder
84class Parser(metaclass=_Parser): 85 """ 86 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 87 88 Args: 89 error_level: The desired error level. 90 Default: ErrorLevel.IMMEDIATE 91 error_message_context: The amount of context to capture from a query string when displaying 92 the error message (in number of characters). 93 Default: 100 94 max_errors: Maximum number of error messages to include in a raised ParseError. 95 This is only relevant if error_level is ErrorLevel.RAISE. 96 Default: 3 97 """ 98 99 FUNCTIONS: t.Dict[str, t.Callable] = { 100 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 101 "CONCAT": lambda args, dialect: exp.Concat( 102 expressions=args, 103 safe=not dialect.STRICT_STRING_CONCAT, 104 coalesce=dialect.CONCAT_COALESCE, 105 ), 106 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 107 expressions=args, 108 safe=not dialect.STRICT_STRING_CONCAT, 109 coalesce=dialect.CONCAT_COALESCE, 110 ), 111 "DATE_TO_DATE_STR": lambda args: exp.Cast( 112 this=seq_get(args, 0), 113 to=exp.DataType(this=exp.DataType.Type.TEXT), 114 ), 115 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 116 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 117 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 118 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 119 "LIKE": build_like, 120 "LOG": build_logarithm, 121 "TIME_TO_TIME_STR": lambda args: exp.Cast( 122 this=seq_get(args, 0), 123 to=exp.DataType(this=exp.DataType.Type.TEXT), 124 ), 125 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 126 this=exp.Cast( 127 this=seq_get(args, 0), 128 to=exp.DataType(this=exp.DataType.Type.TEXT), 129 ), 130 start=exp.Literal.number(1), 131 length=exp.Literal.number(10), 132 ), 133 "VAR_MAP": build_var_map, 134 } 135 136 NO_PAREN_FUNCTIONS = { 137 TokenType.CURRENT_DATE: exp.CurrentDate, 138 TokenType.CURRENT_DATETIME: exp.CurrentDate, 139 TokenType.CURRENT_TIME: exp.CurrentTime, 140 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 141 TokenType.CURRENT_USER: exp.CurrentUser, 142 } 143 144 STRUCT_TYPE_TOKENS = { 145 TokenType.NESTED, 146 TokenType.STRUCT, 147 } 148 149 NESTED_TYPE_TOKENS = { 150 TokenType.ARRAY, 151 TokenType.LOWCARDINALITY, 152 TokenType.MAP, 153 TokenType.NULLABLE, 154 *STRUCT_TYPE_TOKENS, 155 } 156 157 ENUM_TYPE_TOKENS = { 158 TokenType.ENUM, 159 TokenType.ENUM8, 160 TokenType.ENUM16, 161 } 162 163 AGGREGATE_TYPE_TOKENS = { 164 TokenType.AGGREGATEFUNCTION, 165 TokenType.SIMPLEAGGREGATEFUNCTION, 166 } 167 168 TYPE_TOKENS = { 169 TokenType.BIT, 170 TokenType.BOOLEAN, 171 TokenType.TINYINT, 172 TokenType.UTINYINT, 173 TokenType.SMALLINT, 174 TokenType.USMALLINT, 175 TokenType.INT, 176 TokenType.UINT, 177 TokenType.BIGINT, 178 TokenType.UBIGINT, 179 TokenType.INT128, 180 TokenType.UINT128, 181 TokenType.INT256, 182 TokenType.UINT256, 183 TokenType.MEDIUMINT, 184 TokenType.UMEDIUMINT, 185 TokenType.FIXEDSTRING, 186 TokenType.FLOAT, 187 TokenType.DOUBLE, 188 TokenType.CHAR, 189 TokenType.NCHAR, 190 TokenType.VARCHAR, 191 TokenType.NVARCHAR, 192 TokenType.BPCHAR, 193 TokenType.TEXT, 194 TokenType.MEDIUMTEXT, 195 TokenType.LONGTEXT, 196 TokenType.MEDIUMBLOB, 197 TokenType.LONGBLOB, 198 TokenType.BINARY, 199 TokenType.VARBINARY, 200 TokenType.JSON, 201 TokenType.JSONB, 202 TokenType.INTERVAL, 203 TokenType.TINYBLOB, 204 TokenType.TINYTEXT, 205 TokenType.TIME, 206 TokenType.TIMETZ, 207 TokenType.TIMESTAMP, 208 TokenType.TIMESTAMP_S, 209 TokenType.TIMESTAMP_MS, 210 TokenType.TIMESTAMP_NS, 211 TokenType.TIMESTAMPTZ, 212 TokenType.TIMESTAMPLTZ, 213 TokenType.DATETIME, 214 TokenType.DATETIME64, 215 TokenType.DATE, 216 TokenType.DATE32, 217 TokenType.INT4RANGE, 218 TokenType.INT4MULTIRANGE, 219 TokenType.INT8RANGE, 220 TokenType.INT8MULTIRANGE, 221 TokenType.NUMRANGE, 222 TokenType.NUMMULTIRANGE, 223 TokenType.TSRANGE, 224 TokenType.TSMULTIRANGE, 225 TokenType.TSTZRANGE, 226 TokenType.TSTZMULTIRANGE, 227 TokenType.DATERANGE, 228 TokenType.DATEMULTIRANGE, 229 TokenType.DECIMAL, 230 TokenType.UDECIMAL, 231 TokenType.BIGDECIMAL, 232 TokenType.UUID, 233 TokenType.GEOGRAPHY, 234 TokenType.GEOMETRY, 235 TokenType.HLLSKETCH, 236 TokenType.HSTORE, 237 TokenType.PSEUDO_TYPE, 238 TokenType.SUPER, 239 TokenType.SERIAL, 240 TokenType.SMALLSERIAL, 241 TokenType.BIGSERIAL, 242 TokenType.XML, 243 TokenType.YEAR, 244 TokenType.UNIQUEIDENTIFIER, 245 TokenType.USERDEFINED, 246 TokenType.MONEY, 247 TokenType.SMALLMONEY, 248 TokenType.ROWVERSION, 249 TokenType.IMAGE, 250 TokenType.VARIANT, 251 TokenType.OBJECT, 252 TokenType.OBJECT_IDENTIFIER, 253 TokenType.INET, 254 TokenType.IPADDRESS, 255 TokenType.IPPREFIX, 256 TokenType.IPV4, 257 TokenType.IPV6, 258 TokenType.UNKNOWN, 259 TokenType.NULL, 260 *ENUM_TYPE_TOKENS, 261 *NESTED_TYPE_TOKENS, 262 *AGGREGATE_TYPE_TOKENS, 263 } 264 265 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 266 TokenType.BIGINT: TokenType.UBIGINT, 267 TokenType.INT: TokenType.UINT, 268 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 269 TokenType.SMALLINT: TokenType.USMALLINT, 270 TokenType.TINYINT: TokenType.UTINYINT, 271 TokenType.DECIMAL: TokenType.UDECIMAL, 272 } 273 274 SUBQUERY_PREDICATES = { 275 TokenType.ANY: exp.Any, 276 TokenType.ALL: exp.All, 277 TokenType.EXISTS: exp.Exists, 278 TokenType.SOME: exp.Any, 279 } 280 281 RESERVED_TOKENS = { 282 *Tokenizer.SINGLE_TOKENS.values(), 283 TokenType.SELECT, 284 } 285 286 DB_CREATABLES = { 287 TokenType.DATABASE, 288 TokenType.SCHEMA, 289 TokenType.TABLE, 290 TokenType.VIEW, 291 TokenType.MODEL, 292 TokenType.DICTIONARY, 293 TokenType.STORAGE_INTEGRATION, 294 } 295 296 CREATABLES = { 297 TokenType.COLUMN, 298 TokenType.CONSTRAINT, 299 TokenType.FUNCTION, 300 TokenType.INDEX, 301 TokenType.PROCEDURE, 302 TokenType.FOREIGN_KEY, 303 *DB_CREATABLES, 304 } 305 306 # Tokens that can represent identifiers 307 ID_VAR_TOKENS = { 308 TokenType.VAR, 309 TokenType.ANTI, 310 TokenType.APPLY, 311 TokenType.ASC, 312 TokenType.AUTO_INCREMENT, 313 TokenType.BEGIN, 314 TokenType.BPCHAR, 315 TokenType.CACHE, 316 TokenType.CASE, 317 TokenType.COLLATE, 318 TokenType.COMMAND, 319 TokenType.COMMENT, 320 TokenType.COMMIT, 321 TokenType.CONSTRAINT, 322 TokenType.DEFAULT, 323 TokenType.DELETE, 324 TokenType.DESC, 325 TokenType.DESCRIBE, 326 TokenType.DICTIONARY, 327 TokenType.DIV, 328 TokenType.END, 329 TokenType.EXECUTE, 330 TokenType.ESCAPE, 331 TokenType.FALSE, 332 TokenType.FIRST, 333 TokenType.FILTER, 334 TokenType.FINAL, 335 TokenType.FORMAT, 336 TokenType.FULL, 337 TokenType.IS, 338 TokenType.ISNULL, 339 TokenType.INTERVAL, 340 TokenType.KEEP, 341 TokenType.KILL, 342 TokenType.LEFT, 343 TokenType.LOAD, 344 TokenType.MERGE, 345 TokenType.NATURAL, 346 TokenType.NEXT, 347 TokenType.OFFSET, 348 TokenType.OPERATOR, 349 TokenType.ORDINALITY, 350 TokenType.OVERLAPS, 351 TokenType.OVERWRITE, 352 TokenType.PARTITION, 353 TokenType.PERCENT, 354 TokenType.PIVOT, 355 TokenType.PRAGMA, 356 TokenType.RANGE, 357 TokenType.RECURSIVE, 358 TokenType.REFERENCES, 359 TokenType.REFRESH, 360 TokenType.REPLACE, 361 TokenType.RIGHT, 362 TokenType.ROW, 363 TokenType.ROWS, 364 TokenType.SEMI, 365 TokenType.SET, 366 TokenType.SETTINGS, 367 TokenType.SHOW, 368 TokenType.TEMPORARY, 369 TokenType.TOP, 370 TokenType.TRUE, 371 TokenType.UNIQUE, 372 TokenType.UNPIVOT, 373 TokenType.UPDATE, 374 TokenType.USE, 375 TokenType.VOLATILE, 376 TokenType.WINDOW, 377 *CREATABLES, 378 *SUBQUERY_PREDICATES, 379 *TYPE_TOKENS, 380 *NO_PAREN_FUNCTIONS, 381 } 382 383 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 384 385 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 386 TokenType.ANTI, 387 TokenType.APPLY, 388 TokenType.ASOF, 389 TokenType.FULL, 390 TokenType.LEFT, 391 TokenType.LOCK, 392 TokenType.NATURAL, 393 TokenType.OFFSET, 394 TokenType.RIGHT, 395 TokenType.SEMI, 396 TokenType.WINDOW, 397 } 398 399 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 400 401 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 402 403 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 404 405 FUNC_TOKENS = { 406 TokenType.COLLATE, 407 TokenType.COMMAND, 408 TokenType.CURRENT_DATE, 409 TokenType.CURRENT_DATETIME, 410 TokenType.CURRENT_TIMESTAMP, 411 TokenType.CURRENT_TIME, 412 TokenType.CURRENT_USER, 413 TokenType.FILTER, 414 TokenType.FIRST, 415 TokenType.FORMAT, 416 TokenType.GLOB, 417 TokenType.IDENTIFIER, 418 TokenType.INDEX, 419 TokenType.ISNULL, 420 TokenType.ILIKE, 421 TokenType.INSERT, 422 TokenType.LIKE, 423 TokenType.MERGE, 424 TokenType.OFFSET, 425 TokenType.PRIMARY_KEY, 426 TokenType.RANGE, 427 TokenType.REPLACE, 428 TokenType.RLIKE, 429 TokenType.ROW, 430 TokenType.UNNEST, 431 TokenType.VAR, 432 TokenType.LEFT, 433 TokenType.RIGHT, 434 TokenType.DATE, 435 TokenType.DATETIME, 436 TokenType.TABLE, 437 TokenType.TIMESTAMP, 438 TokenType.TIMESTAMPTZ, 439 TokenType.WINDOW, 440 TokenType.XOR, 441 *TYPE_TOKENS, 442 *SUBQUERY_PREDICATES, 443 } 444 445 CONJUNCTION = { 446 TokenType.AND: exp.And, 447 TokenType.OR: exp.Or, 448 } 449 450 EQUALITY = { 451 TokenType.COLON_EQ: exp.PropertyEQ, 452 TokenType.EQ: exp.EQ, 453 TokenType.NEQ: exp.NEQ, 454 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 455 } 456 457 COMPARISON = { 458 TokenType.GT: exp.GT, 459 TokenType.GTE: exp.GTE, 460 TokenType.LT: exp.LT, 461 TokenType.LTE: exp.LTE, 462 } 463 464 BITWISE = { 465 TokenType.AMP: exp.BitwiseAnd, 466 TokenType.CARET: exp.BitwiseXor, 467 TokenType.PIPE: exp.BitwiseOr, 468 } 469 470 TERM = { 471 TokenType.DASH: exp.Sub, 472 TokenType.PLUS: exp.Add, 473 TokenType.MOD: exp.Mod, 474 TokenType.COLLATE: exp.Collate, 475 } 476 477 FACTOR = { 478 TokenType.DIV: exp.IntDiv, 479 TokenType.LR_ARROW: exp.Distance, 480 TokenType.SLASH: exp.Div, 481 TokenType.STAR: exp.Mul, 482 } 483 484 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 485 486 TIMES = { 487 TokenType.TIME, 488 TokenType.TIMETZ, 489 } 490 491 TIMESTAMPS = { 492 TokenType.TIMESTAMP, 493 TokenType.TIMESTAMPTZ, 494 TokenType.TIMESTAMPLTZ, 495 *TIMES, 496 } 497 498 SET_OPERATIONS = { 499 TokenType.UNION, 500 TokenType.INTERSECT, 501 TokenType.EXCEPT, 502 } 503 504 JOIN_METHODS = { 505 TokenType.NATURAL, 506 TokenType.ASOF, 507 } 508 509 JOIN_SIDES = { 510 TokenType.LEFT, 511 TokenType.RIGHT, 512 TokenType.FULL, 513 } 514 515 JOIN_KINDS = { 516 TokenType.INNER, 517 TokenType.OUTER, 518 TokenType.CROSS, 519 TokenType.SEMI, 520 TokenType.ANTI, 521 } 522 523 JOIN_HINTS: t.Set[str] = set() 524 525 LAMBDAS = { 526 TokenType.ARROW: lambda self, expressions: self.expression( 527 exp.Lambda, 528 this=self._replace_lambda( 529 self._parse_conjunction(), 530 {node.name for node in expressions}, 531 ), 532 expressions=expressions, 533 ), 534 TokenType.FARROW: lambda self, expressions: self.expression( 535 exp.Kwarg, 536 this=exp.var(expressions[0].name), 537 expression=self._parse_conjunction(), 538 ), 539 } 540 541 COLUMN_OPERATORS = { 542 TokenType.DOT: None, 543 TokenType.DCOLON: lambda self, this, to: self.expression( 544 exp.Cast if self.STRICT_CAST else exp.TryCast, 545 this=this, 546 to=to, 547 ), 548 TokenType.ARROW: lambda self, this, path: self.expression( 549 exp.JSONExtract, 550 this=this, 551 expression=self.dialect.to_json_path(path), 552 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 553 ), 554 TokenType.DARROW: lambda self, this, path: self.expression( 555 exp.JSONExtractScalar, 556 this=this, 557 expression=self.dialect.to_json_path(path), 558 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 559 ), 560 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 561 exp.JSONBExtract, 562 this=this, 563 expression=path, 564 ), 565 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 566 exp.JSONBExtractScalar, 567 this=this, 568 expression=path, 569 ), 570 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 571 exp.JSONBContains, 572 this=this, 573 expression=key, 574 ), 575 } 576 577 EXPRESSION_PARSERS = { 578 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 579 exp.Column: lambda self: self._parse_column(), 580 exp.Condition: lambda self: self._parse_conjunction(), 581 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 582 exp.Expression: lambda self: self._parse_statement(), 583 exp.From: lambda self: self._parse_from(), 584 exp.Group: lambda self: self._parse_group(), 585 exp.Having: lambda self: self._parse_having(), 586 exp.Identifier: lambda self: self._parse_id_var(), 587 exp.Join: lambda self: self._parse_join(), 588 exp.Lambda: lambda self: self._parse_lambda(), 589 exp.Lateral: lambda self: self._parse_lateral(), 590 exp.Limit: lambda self: self._parse_limit(), 591 exp.Offset: lambda self: self._parse_offset(), 592 exp.Order: lambda self: self._parse_order(), 593 exp.Ordered: lambda self: self._parse_ordered(), 594 exp.Properties: lambda self: self._parse_properties(), 595 exp.Qualify: lambda self: self._parse_qualify(), 596 exp.Returning: lambda self: self._parse_returning(), 597 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 598 exp.Table: lambda self: self._parse_table_parts(), 599 exp.TableAlias: lambda self: self._parse_table_alias(), 600 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 601 exp.Where: lambda self: self._parse_where(), 602 exp.Window: lambda self: self._parse_named_window(), 603 exp.With: lambda self: self._parse_with(), 604 "JOIN_TYPE": lambda self: self._parse_join_parts(), 605 } 606 607 STATEMENT_PARSERS = { 608 TokenType.ALTER: lambda self: self._parse_alter(), 609 TokenType.BEGIN: lambda self: self._parse_transaction(), 610 TokenType.CACHE: lambda self: self._parse_cache(), 611 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 612 TokenType.COMMENT: lambda self: self._parse_comment(), 613 TokenType.CREATE: lambda self: self._parse_create(), 614 TokenType.DELETE: lambda self: self._parse_delete(), 615 TokenType.DESC: lambda self: self._parse_describe(), 616 TokenType.DESCRIBE: lambda self: self._parse_describe(), 617 TokenType.DROP: lambda self: self._parse_drop(), 618 TokenType.INSERT: lambda self: self._parse_insert(), 619 TokenType.KILL: lambda self: self._parse_kill(), 620 TokenType.LOAD: lambda self: self._parse_load(), 621 TokenType.MERGE: lambda self: self._parse_merge(), 622 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 623 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 624 TokenType.REFRESH: lambda self: self._parse_refresh(), 625 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 626 TokenType.SET: lambda self: self._parse_set(), 627 TokenType.UNCACHE: lambda self: self._parse_uncache(), 628 TokenType.UPDATE: lambda self: self._parse_update(), 629 TokenType.USE: lambda self: self.expression( 630 exp.Use, 631 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 632 and exp.var(self._prev.text), 633 this=self._parse_table(schema=False), 634 ), 635 } 636 637 UNARY_PARSERS = { 638 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 639 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 640 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 641 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 642 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 643 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 644 } 645 646 PRIMARY_PARSERS = { 647 TokenType.STRING: lambda self, token: self.expression( 648 exp.Literal, this=token.text, is_string=True 649 ), 650 TokenType.NUMBER: lambda self, token: self.expression( 651 exp.Literal, this=token.text, is_string=False 652 ), 653 TokenType.STAR: lambda self, _: self.expression( 654 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 655 ), 656 TokenType.NULL: lambda self, _: self.expression(exp.Null), 657 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 658 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 659 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 660 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 661 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 662 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 663 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 664 exp.National, this=token.text 665 ), 666 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 667 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 668 exp.RawString, this=token.text 669 ), 670 TokenType.UNICODE_STRING: lambda self, token: self.expression( 671 exp.UnicodeString, 672 this=token.text, 673 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 674 ), 675 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 676 } 677 678 PLACEHOLDER_PARSERS = { 679 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 680 TokenType.PARAMETER: lambda self: self._parse_parameter(), 681 TokenType.COLON: lambda self: ( 682 self.expression(exp.Placeholder, this=self._prev.text) 683 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 684 else None 685 ), 686 } 687 688 RANGE_PARSERS = { 689 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 690 TokenType.GLOB: binary_range_parser(exp.Glob), 691 TokenType.ILIKE: binary_range_parser(exp.ILike), 692 TokenType.IN: lambda self, this: self._parse_in(this), 693 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 694 TokenType.IS: lambda self, this: self._parse_is(this), 695 TokenType.LIKE: binary_range_parser(exp.Like), 696 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 697 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 698 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 699 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 700 } 701 702 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 703 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 704 "AUTO": lambda self: self._parse_auto_property(), 705 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 706 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 707 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 708 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 709 "CHECKSUM": lambda self: self._parse_checksum(), 710 "CLUSTER BY": lambda self: self._parse_cluster(), 711 "CLUSTERED": lambda self: self._parse_clustered_by(), 712 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 713 exp.CollateProperty, **kwargs 714 ), 715 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 716 "CONTAINS": lambda self: self._parse_contains_property(), 717 "COPY": lambda self: self._parse_copy_property(), 718 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 719 "DEFINER": lambda self: self._parse_definer(), 720 "DETERMINISTIC": lambda self: self.expression( 721 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 722 ), 723 "DISTKEY": lambda self: self._parse_distkey(), 724 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 725 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 726 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 727 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 728 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 729 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 730 "FREESPACE": lambda self: self._parse_freespace(), 731 "HEAP": lambda self: self.expression(exp.HeapProperty), 732 "IMMUTABLE": lambda self: self.expression( 733 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 734 ), 735 "INHERITS": lambda self: self.expression( 736 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 737 ), 738 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 739 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 740 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 741 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 742 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 743 "LIKE": lambda self: self._parse_create_like(), 744 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 745 "LOCK": lambda self: self._parse_locking(), 746 "LOCKING": lambda self: self._parse_locking(), 747 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 748 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 749 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 750 "MODIFIES": lambda self: self._parse_modifies_property(), 751 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 752 "NO": lambda self: self._parse_no_property(), 753 "ON": lambda self: self._parse_on_property(), 754 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 755 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 756 "PARTITION": lambda self: self._parse_partitioned_of(), 757 "PARTITION BY": lambda self: self._parse_partitioned_by(), 758 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 759 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 760 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 761 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 762 "READS": lambda self: self._parse_reads_property(), 763 "REMOTE": lambda self: self._parse_remote_with_connection(), 764 "RETURNS": lambda self: self._parse_returns(), 765 "ROW": lambda self: self._parse_row(), 766 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 767 "SAMPLE": lambda self: self.expression( 768 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 769 ), 770 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 771 "SETTINGS": lambda self: self.expression( 772 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 773 ), 774 "SORTKEY": lambda self: self._parse_sortkey(), 775 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 776 "STABLE": lambda self: self.expression( 777 exp.StabilityProperty, this=exp.Literal.string("STABLE") 778 ), 779 "STORED": lambda self: self._parse_stored(), 780 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 781 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 782 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 783 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 784 "TO": lambda self: self._parse_to_table(), 785 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 786 "TRANSFORM": lambda self: self.expression( 787 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 788 ), 789 "TTL": lambda self: self._parse_ttl(), 790 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 791 "VOLATILE": lambda self: self._parse_volatile_property(), 792 "WITH": lambda self: self._parse_with_property(), 793 } 794 795 CONSTRAINT_PARSERS = { 796 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 797 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 798 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 799 "CHARACTER SET": lambda self: self.expression( 800 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 801 ), 802 "CHECK": lambda self: self.expression( 803 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 804 ), 805 "COLLATE": lambda self: self.expression( 806 exp.CollateColumnConstraint, this=self._parse_var() 807 ), 808 "COMMENT": lambda self: self.expression( 809 exp.CommentColumnConstraint, this=self._parse_string() 810 ), 811 "COMPRESS": lambda self: self._parse_compress(), 812 "CLUSTERED": lambda self: self.expression( 813 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 814 ), 815 "NONCLUSTERED": lambda self: self.expression( 816 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 817 ), 818 "DEFAULT": lambda self: self.expression( 819 exp.DefaultColumnConstraint, this=self._parse_bitwise() 820 ), 821 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 822 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 823 "FORMAT": lambda self: self.expression( 824 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 825 ), 826 "GENERATED": lambda self: self._parse_generated_as_identity(), 827 "IDENTITY": lambda self: self._parse_auto_increment(), 828 "INLINE": lambda self: self._parse_inline(), 829 "LIKE": lambda self: self._parse_create_like(), 830 "NOT": lambda self: self._parse_not_constraint(), 831 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 832 "ON": lambda self: ( 833 self._match(TokenType.UPDATE) 834 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 835 ) 836 or self.expression(exp.OnProperty, this=self._parse_id_var()), 837 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 838 "PERIOD": lambda self: self._parse_period_for_system_time(), 839 "PRIMARY KEY": lambda self: self._parse_primary_key(), 840 "REFERENCES": lambda self: self._parse_references(match=False), 841 "TITLE": lambda self: self.expression( 842 exp.TitleColumnConstraint, this=self._parse_var_or_string() 843 ), 844 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 845 "UNIQUE": lambda self: self._parse_unique(), 846 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 847 "WITH": lambda self: self.expression( 848 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 849 ), 850 } 851 852 ALTER_PARSERS = { 853 "ADD": lambda self: self._parse_alter_table_add(), 854 "ALTER": lambda self: self._parse_alter_table_alter(), 855 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 856 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 857 "DROP": lambda self: self._parse_alter_table_drop(), 858 "RENAME": lambda self: self._parse_alter_table_rename(), 859 } 860 861 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 862 863 NO_PAREN_FUNCTION_PARSERS = { 864 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 865 "CASE": lambda self: self._parse_case(), 866 "IF": lambda self: self._parse_if(), 867 "NEXT": lambda self: self._parse_next_value_for(), 868 } 869 870 INVALID_FUNC_NAME_TOKENS = { 871 TokenType.IDENTIFIER, 872 TokenType.STRING, 873 } 874 875 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 876 877 FUNCTION_PARSERS = { 878 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 879 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 880 "DECODE": lambda self: self._parse_decode(), 881 "EXTRACT": lambda self: self._parse_extract(), 882 "JSON_OBJECT": lambda self: self._parse_json_object(), 883 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 884 "JSON_TABLE": lambda self: self._parse_json_table(), 885 "MATCH": lambda self: self._parse_match_against(), 886 "OPENJSON": lambda self: self._parse_open_json(), 887 "POSITION": lambda self: self._parse_position(), 888 "PREDICT": lambda self: self._parse_predict(), 889 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 890 "STRING_AGG": lambda self: self._parse_string_agg(), 891 "SUBSTRING": lambda self: self._parse_substring(), 892 "TRIM": lambda self: self._parse_trim(), 893 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 894 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 895 } 896 897 QUERY_MODIFIER_PARSERS = { 898 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 899 TokenType.WHERE: lambda self: ("where", self._parse_where()), 900 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 901 TokenType.HAVING: lambda self: ("having", self._parse_having()), 902 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 903 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 904 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 905 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 906 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 907 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 908 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 909 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 910 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 911 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 912 TokenType.CLUSTER_BY: lambda self: ( 913 "cluster", 914 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 915 ), 916 TokenType.DISTRIBUTE_BY: lambda self: ( 917 "distribute", 918 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 919 ), 920 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 921 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 922 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 923 } 924 925 SET_PARSERS = { 926 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 927 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 928 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 929 "TRANSACTION": lambda self: self._parse_set_transaction(), 930 } 931 932 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 933 934 TYPE_LITERAL_PARSERS = { 935 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 936 } 937 938 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 939 940 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 941 942 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 943 944 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 945 TRANSACTION_CHARACTERISTICS = { 946 "ISOLATION LEVEL REPEATABLE READ", 947 "ISOLATION LEVEL READ COMMITTED", 948 "ISOLATION LEVEL READ UNCOMMITTED", 949 "ISOLATION LEVEL SERIALIZABLE", 950 "READ WRITE", 951 "READ ONLY", 952 } 953 954 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 955 956 CLONE_KEYWORDS = {"CLONE", "COPY"} 957 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 958 959 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 960 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 961 962 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 963 964 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 965 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 966 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 967 968 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 969 970 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 971 972 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 973 974 DISTINCT_TOKENS = {TokenType.DISTINCT} 975 976 NULL_TOKENS = {TokenType.NULL} 977 978 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 979 980 STRICT_CAST = True 981 982 PREFIXED_PIVOT_COLUMNS = False 983 IDENTIFY_PIVOT_STRINGS = False 984 985 LOG_DEFAULTS_TO_LN = False 986 987 # Whether ADD is present for each column added by ALTER TABLE 988 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 989 990 # Whether the table sample clause expects CSV syntax 991 TABLESAMPLE_CSV = False 992 993 # Whether the SET command needs a delimiter (e.g. "=") for assignments 994 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 995 996 # Whether the TRIM function expects the characters to trim as its first argument 997 TRIM_PATTERN_FIRST = False 998 999 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1000 STRING_ALIASES = False 1001 1002 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1003 MODIFIERS_ATTACHED_TO_UNION = True 1004 UNION_MODIFIERS = {"order", "limit", "offset"} 1005 1006 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1007 NO_PAREN_IF_COMMANDS = True 1008 1009 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1010 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1011 1012 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1013 # If this is True and '(' is not found, the keyword will be treated as an identifier 1014 VALUES_FOLLOWED_BY_PAREN = True 1015 1016 __slots__ = ( 1017 "error_level", 1018 "error_message_context", 1019 "max_errors", 1020 "dialect", 1021 "sql", 1022 "errors", 1023 "_tokens", 1024 "_index", 1025 "_curr", 1026 "_next", 1027 "_prev", 1028 "_prev_comments", 1029 ) 1030 1031 # Autofilled 1032 SHOW_TRIE: t.Dict = {} 1033 SET_TRIE: t.Dict = {} 1034 1035 def __init__( 1036 self, 1037 error_level: t.Optional[ErrorLevel] = None, 1038 error_message_context: int = 100, 1039 max_errors: int = 3, 1040 dialect: DialectType = None, 1041 ): 1042 from sqlglot.dialects import Dialect 1043 1044 self.error_level = error_level or ErrorLevel.IMMEDIATE 1045 self.error_message_context = error_message_context 1046 self.max_errors = max_errors 1047 self.dialect = Dialect.get_or_raise(dialect) 1048 self.reset() 1049 1050 def reset(self): 1051 self.sql = "" 1052 self.errors = [] 1053 self._tokens = [] 1054 self._index = 0 1055 self._curr = None 1056 self._next = None 1057 self._prev = None 1058 self._prev_comments = None 1059 1060 def parse( 1061 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1062 ) -> t.List[t.Optional[exp.Expression]]: 1063 """ 1064 Parses a list of tokens and returns a list of syntax trees, one tree 1065 per parsed SQL statement. 1066 1067 Args: 1068 raw_tokens: The list of tokens. 1069 sql: The original SQL string, used to produce helpful debug messages. 1070 1071 Returns: 1072 The list of the produced syntax trees. 1073 """ 1074 return self._parse( 1075 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1076 ) 1077 1078 def parse_into( 1079 self, 1080 expression_types: exp.IntoType, 1081 raw_tokens: t.List[Token], 1082 sql: t.Optional[str] = None, 1083 ) -> t.List[t.Optional[exp.Expression]]: 1084 """ 1085 Parses a list of tokens into a given Expression type. If a collection of Expression 1086 types is given instead, this method will try to parse the token list into each one 1087 of them, stopping at the first for which the parsing succeeds. 1088 1089 Args: 1090 expression_types: The expression type(s) to try and parse the token list into. 1091 raw_tokens: The list of tokens. 1092 sql: The original SQL string, used to produce helpful debug messages. 1093 1094 Returns: 1095 The target Expression. 1096 """ 1097 errors = [] 1098 for expression_type in ensure_list(expression_types): 1099 parser = self.EXPRESSION_PARSERS.get(expression_type) 1100 if not parser: 1101 raise TypeError(f"No parser registered for {expression_type}") 1102 1103 try: 1104 return self._parse(parser, raw_tokens, sql) 1105 except ParseError as e: 1106 e.errors[0]["into_expression"] = expression_type 1107 errors.append(e) 1108 1109 raise ParseError( 1110 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1111 errors=merge_errors(errors), 1112 ) from errors[-1] 1113 1114 def _parse( 1115 self, 1116 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1117 raw_tokens: t.List[Token], 1118 sql: t.Optional[str] = None, 1119 ) -> t.List[t.Optional[exp.Expression]]: 1120 self.reset() 1121 self.sql = sql or "" 1122 1123 total = len(raw_tokens) 1124 chunks: t.List[t.List[Token]] = [[]] 1125 1126 for i, token in enumerate(raw_tokens): 1127 if token.token_type == TokenType.SEMICOLON: 1128 if i < total - 1: 1129 chunks.append([]) 1130 else: 1131 chunks[-1].append(token) 1132 1133 expressions = [] 1134 1135 for tokens in chunks: 1136 self._index = -1 1137 self._tokens = tokens 1138 self._advance() 1139 1140 expressions.append(parse_method(self)) 1141 1142 if self._index < len(self._tokens): 1143 self.raise_error("Invalid expression / Unexpected token") 1144 1145 self.check_errors() 1146 1147 return expressions 1148 1149 def check_errors(self) -> None: 1150 """Logs or raises any found errors, depending on the chosen error level setting.""" 1151 if self.error_level == ErrorLevel.WARN: 1152 for error in self.errors: 1153 logger.error(str(error)) 1154 elif self.error_level == ErrorLevel.RAISE and self.errors: 1155 raise ParseError( 1156 concat_messages(self.errors, self.max_errors), 1157 errors=merge_errors(self.errors), 1158 ) 1159 1160 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1161 """ 1162 Appends an error in the list of recorded errors or raises it, depending on the chosen 1163 error level setting. 1164 """ 1165 token = token or self._curr or self._prev or Token.string("") 1166 start = token.start 1167 end = token.end + 1 1168 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1169 highlight = self.sql[start:end] 1170 end_context = self.sql[end : end + self.error_message_context] 1171 1172 error = ParseError.new( 1173 f"{message}. Line {token.line}, Col: {token.col}.\n" 1174 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1175 description=message, 1176 line=token.line, 1177 col=token.col, 1178 start_context=start_context, 1179 highlight=highlight, 1180 end_context=end_context, 1181 ) 1182 1183 if self.error_level == ErrorLevel.IMMEDIATE: 1184 raise error 1185 1186 self.errors.append(error) 1187 1188 def expression( 1189 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1190 ) -> E: 1191 """ 1192 Creates a new, validated Expression. 1193 1194 Args: 1195 exp_class: The expression class to instantiate. 1196 comments: An optional list of comments to attach to the expression. 1197 kwargs: The arguments to set for the expression along with their respective values. 1198 1199 Returns: 1200 The target expression. 1201 """ 1202 instance = exp_class(**kwargs) 1203 instance.add_comments(comments) if comments else self._add_comments(instance) 1204 return self.validate_expression(instance) 1205 1206 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1207 if expression and self._prev_comments: 1208 expression.add_comments(self._prev_comments) 1209 self._prev_comments = None 1210 1211 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1212 """ 1213 Validates an Expression, making sure that all its mandatory arguments are set. 1214 1215 Args: 1216 expression: The expression to validate. 1217 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1218 1219 Returns: 1220 The validated expression. 1221 """ 1222 if self.error_level != ErrorLevel.IGNORE: 1223 for error_message in expression.error_messages(args): 1224 self.raise_error(error_message) 1225 1226 return expression 1227 1228 def _find_sql(self, start: Token, end: Token) -> str: 1229 return self.sql[start.start : end.end + 1] 1230 1231 def _is_connected(self) -> bool: 1232 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1233 1234 def _advance(self, times: int = 1) -> None: 1235 self._index += times 1236 self._curr = seq_get(self._tokens, self._index) 1237 self._next = seq_get(self._tokens, self._index + 1) 1238 1239 if self._index > 0: 1240 self._prev = self._tokens[self._index - 1] 1241 self._prev_comments = self._prev.comments 1242 else: 1243 self._prev = None 1244 self._prev_comments = None 1245 1246 def _retreat(self, index: int) -> None: 1247 if index != self._index: 1248 self._advance(index - self._index) 1249 1250 def _warn_unsupported(self) -> None: 1251 if len(self._tokens) <= 1: 1252 return 1253 1254 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1255 # interested in emitting a warning for the one being currently processed. 1256 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1257 1258 logger.warning( 1259 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1260 ) 1261 1262 def _parse_command(self) -> exp.Command: 1263 self._warn_unsupported() 1264 return self.expression( 1265 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1266 ) 1267 1268 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1269 start = self._prev 1270 exists = self._parse_exists() if allow_exists else None 1271 1272 self._match(TokenType.ON) 1273 1274 kind = self._match_set(self.CREATABLES) and self._prev 1275 if not kind: 1276 return self._parse_as_command(start) 1277 1278 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1279 this = self._parse_user_defined_function(kind=kind.token_type) 1280 elif kind.token_type == TokenType.TABLE: 1281 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1282 elif kind.token_type == TokenType.COLUMN: 1283 this = self._parse_column() 1284 else: 1285 this = self._parse_id_var() 1286 1287 self._match(TokenType.IS) 1288 1289 return self.expression( 1290 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1291 ) 1292 1293 def _parse_to_table( 1294 self, 1295 ) -> exp.ToTableProperty: 1296 table = self._parse_table_parts(schema=True) 1297 return self.expression(exp.ToTableProperty, this=table) 1298 1299 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1300 def _parse_ttl(self) -> exp.Expression: 1301 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1302 this = self._parse_bitwise() 1303 1304 if self._match_text_seq("DELETE"): 1305 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1306 if self._match_text_seq("RECOMPRESS"): 1307 return self.expression( 1308 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1309 ) 1310 if self._match_text_seq("TO", "DISK"): 1311 return self.expression( 1312 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1313 ) 1314 if self._match_text_seq("TO", "VOLUME"): 1315 return self.expression( 1316 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1317 ) 1318 1319 return this 1320 1321 expressions = self._parse_csv(_parse_ttl_action) 1322 where = self._parse_where() 1323 group = self._parse_group() 1324 1325 aggregates = None 1326 if group and self._match(TokenType.SET): 1327 aggregates = self._parse_csv(self._parse_set_item) 1328 1329 return self.expression( 1330 exp.MergeTreeTTL, 1331 expressions=expressions, 1332 where=where, 1333 group=group, 1334 aggregates=aggregates, 1335 ) 1336 1337 def _parse_statement(self) -> t.Optional[exp.Expression]: 1338 if self._curr is None: 1339 return None 1340 1341 if self._match_set(self.STATEMENT_PARSERS): 1342 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1343 1344 if self._match_set(Tokenizer.COMMANDS): 1345 return self._parse_command() 1346 1347 expression = self._parse_expression() 1348 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1349 return self._parse_query_modifiers(expression) 1350 1351 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1352 start = self._prev 1353 temporary = self._match(TokenType.TEMPORARY) 1354 materialized = self._match_text_seq("MATERIALIZED") 1355 1356 kind = self._match_set(self.CREATABLES) and self._prev.text 1357 if not kind: 1358 return self._parse_as_command(start) 1359 1360 return self.expression( 1361 exp.Drop, 1362 comments=start.comments, 1363 exists=exists or self._parse_exists(), 1364 this=self._parse_table( 1365 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1366 ), 1367 kind=kind, 1368 temporary=temporary, 1369 materialized=materialized, 1370 cascade=self._match_text_seq("CASCADE"), 1371 constraints=self._match_text_seq("CONSTRAINTS"), 1372 purge=self._match_text_seq("PURGE"), 1373 ) 1374 1375 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1376 return ( 1377 self._match_text_seq("IF") 1378 and (not not_ or self._match(TokenType.NOT)) 1379 and self._match(TokenType.EXISTS) 1380 ) 1381 1382 def _parse_create(self) -> exp.Create | exp.Command: 1383 # Note: this can't be None because we've matched a statement parser 1384 start = self._prev 1385 comments = self._prev_comments 1386 1387 replace = ( 1388 start.token_type == TokenType.REPLACE 1389 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1390 or self._match_pair(TokenType.OR, TokenType.ALTER) 1391 ) 1392 unique = self._match(TokenType.UNIQUE) 1393 1394 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1395 self._advance() 1396 1397 properties = None 1398 create_token = self._match_set(self.CREATABLES) and self._prev 1399 1400 if not create_token: 1401 # exp.Properties.Location.POST_CREATE 1402 properties = self._parse_properties() 1403 create_token = self._match_set(self.CREATABLES) and self._prev 1404 1405 if not properties or not create_token: 1406 return self._parse_as_command(start) 1407 1408 exists = self._parse_exists(not_=True) 1409 this = None 1410 expression: t.Optional[exp.Expression] = None 1411 indexes = None 1412 no_schema_binding = None 1413 begin = None 1414 end = None 1415 clone = None 1416 1417 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1418 nonlocal properties 1419 if properties and temp_props: 1420 properties.expressions.extend(temp_props.expressions) 1421 elif temp_props: 1422 properties = temp_props 1423 1424 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1425 this = self._parse_user_defined_function(kind=create_token.token_type) 1426 1427 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1428 extend_props(self._parse_properties()) 1429 1430 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1431 1432 if not expression: 1433 if self._match(TokenType.COMMAND): 1434 expression = self._parse_as_command(self._prev) 1435 else: 1436 begin = self._match(TokenType.BEGIN) 1437 return_ = self._match_text_seq("RETURN") 1438 1439 if self._match(TokenType.STRING, advance=False): 1440 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1441 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1442 expression = self._parse_string() 1443 extend_props(self._parse_properties()) 1444 else: 1445 expression = self._parse_statement() 1446 1447 end = self._match_text_seq("END") 1448 1449 if return_: 1450 expression = self.expression(exp.Return, this=expression) 1451 elif create_token.token_type == TokenType.INDEX: 1452 this = self._parse_index(index=self._parse_id_var()) 1453 elif create_token.token_type in self.DB_CREATABLES: 1454 table_parts = self._parse_table_parts( 1455 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1456 ) 1457 1458 # exp.Properties.Location.POST_NAME 1459 self._match(TokenType.COMMA) 1460 extend_props(self._parse_properties(before=True)) 1461 1462 this = self._parse_schema(this=table_parts) 1463 1464 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1465 extend_props(self._parse_properties()) 1466 1467 self._match(TokenType.ALIAS) 1468 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1469 # exp.Properties.Location.POST_ALIAS 1470 extend_props(self._parse_properties()) 1471 1472 expression = self._parse_ddl_select() 1473 1474 if create_token.token_type == TokenType.TABLE: 1475 # exp.Properties.Location.POST_EXPRESSION 1476 extend_props(self._parse_properties()) 1477 1478 indexes = [] 1479 while True: 1480 index = self._parse_index() 1481 1482 # exp.Properties.Location.POST_INDEX 1483 extend_props(self._parse_properties()) 1484 1485 if not index: 1486 break 1487 else: 1488 self._match(TokenType.COMMA) 1489 indexes.append(index) 1490 elif create_token.token_type == TokenType.VIEW: 1491 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1492 no_schema_binding = True 1493 1494 shallow = self._match_text_seq("SHALLOW") 1495 1496 if self._match_texts(self.CLONE_KEYWORDS): 1497 copy = self._prev.text.lower() == "copy" 1498 clone = self.expression( 1499 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1500 ) 1501 1502 if self._curr: 1503 return self._parse_as_command(start) 1504 1505 return self.expression( 1506 exp.Create, 1507 comments=comments, 1508 this=this, 1509 kind=create_token.text.upper(), 1510 replace=replace, 1511 unique=unique, 1512 expression=expression, 1513 exists=exists, 1514 properties=properties, 1515 indexes=indexes, 1516 no_schema_binding=no_schema_binding, 1517 begin=begin, 1518 end=end, 1519 clone=clone, 1520 ) 1521 1522 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1523 # only used for teradata currently 1524 self._match(TokenType.COMMA) 1525 1526 kwargs = { 1527 "no": self._match_text_seq("NO"), 1528 "dual": self._match_text_seq("DUAL"), 1529 "before": self._match_text_seq("BEFORE"), 1530 "default": self._match_text_seq("DEFAULT"), 1531 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1532 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1533 "after": self._match_text_seq("AFTER"), 1534 "minimum": self._match_texts(("MIN", "MINIMUM")), 1535 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1536 } 1537 1538 if self._match_texts(self.PROPERTY_PARSERS): 1539 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1540 try: 1541 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1542 except TypeError: 1543 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1544 1545 return None 1546 1547 def _parse_property(self) -> t.Optional[exp.Expression]: 1548 if self._match_texts(self.PROPERTY_PARSERS): 1549 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1550 1551 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1552 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1553 1554 if self._match_text_seq("COMPOUND", "SORTKEY"): 1555 return self._parse_sortkey(compound=True) 1556 1557 if self._match_text_seq("SQL", "SECURITY"): 1558 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1559 1560 index = self._index 1561 key = self._parse_column() 1562 1563 if not self._match(TokenType.EQ): 1564 self._retreat(index) 1565 return None 1566 1567 return self.expression( 1568 exp.Property, 1569 this=key.to_dot() if isinstance(key, exp.Column) else key, 1570 value=self._parse_column() or self._parse_var(any_token=True), 1571 ) 1572 1573 def _parse_stored(self) -> exp.FileFormatProperty: 1574 self._match(TokenType.ALIAS) 1575 1576 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1577 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1578 1579 return self.expression( 1580 exp.FileFormatProperty, 1581 this=( 1582 self.expression( 1583 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1584 ) 1585 if input_format or output_format 1586 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1587 ), 1588 ) 1589 1590 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1591 self._match(TokenType.EQ) 1592 self._match(TokenType.ALIAS) 1593 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1594 1595 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1596 properties = [] 1597 while True: 1598 if before: 1599 prop = self._parse_property_before() 1600 else: 1601 prop = self._parse_property() 1602 1603 if not prop: 1604 break 1605 for p in ensure_list(prop): 1606 properties.append(p) 1607 1608 if properties: 1609 return self.expression(exp.Properties, expressions=properties) 1610 1611 return None 1612 1613 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1614 return self.expression( 1615 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1616 ) 1617 1618 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1619 if self._index >= 2: 1620 pre_volatile_token = self._tokens[self._index - 2] 1621 else: 1622 pre_volatile_token = None 1623 1624 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1625 return exp.VolatileProperty() 1626 1627 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1628 1629 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1630 self._match_pair(TokenType.EQ, TokenType.ON) 1631 1632 prop = self.expression(exp.WithSystemVersioningProperty) 1633 if self._match(TokenType.L_PAREN): 1634 self._match_text_seq("HISTORY_TABLE", "=") 1635 prop.set("this", self._parse_table_parts()) 1636 1637 if self._match(TokenType.COMMA): 1638 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1639 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1640 1641 self._match_r_paren() 1642 1643 return prop 1644 1645 def _parse_with_property( 1646 self, 1647 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1648 if self._match(TokenType.L_PAREN, advance=False): 1649 return self._parse_wrapped_csv(self._parse_property) 1650 1651 if self._match_text_seq("JOURNAL"): 1652 return self._parse_withjournaltable() 1653 1654 if self._match_text_seq("DATA"): 1655 return self._parse_withdata(no=False) 1656 elif self._match_text_seq("NO", "DATA"): 1657 return self._parse_withdata(no=True) 1658 1659 if not self._next: 1660 return None 1661 1662 return self._parse_withisolatedloading() 1663 1664 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1665 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1666 self._match(TokenType.EQ) 1667 1668 user = self._parse_id_var() 1669 self._match(TokenType.PARAMETER) 1670 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1671 1672 if not user or not host: 1673 return None 1674 1675 return exp.DefinerProperty(this=f"{user}@{host}") 1676 1677 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1678 self._match(TokenType.TABLE) 1679 self._match(TokenType.EQ) 1680 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1681 1682 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1683 return self.expression(exp.LogProperty, no=no) 1684 1685 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1686 return self.expression(exp.JournalProperty, **kwargs) 1687 1688 def _parse_checksum(self) -> exp.ChecksumProperty: 1689 self._match(TokenType.EQ) 1690 1691 on = None 1692 if self._match(TokenType.ON): 1693 on = True 1694 elif self._match_text_seq("OFF"): 1695 on = False 1696 1697 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1698 1699 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1700 return self.expression( 1701 exp.Cluster, 1702 expressions=( 1703 self._parse_wrapped_csv(self._parse_ordered) 1704 if wrapped 1705 else self._parse_csv(self._parse_ordered) 1706 ), 1707 ) 1708 1709 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1710 self._match_text_seq("BY") 1711 1712 self._match_l_paren() 1713 expressions = self._parse_csv(self._parse_column) 1714 self._match_r_paren() 1715 1716 if self._match_text_seq("SORTED", "BY"): 1717 self._match_l_paren() 1718 sorted_by = self._parse_csv(self._parse_ordered) 1719 self._match_r_paren() 1720 else: 1721 sorted_by = None 1722 1723 self._match(TokenType.INTO) 1724 buckets = self._parse_number() 1725 self._match_text_seq("BUCKETS") 1726 1727 return self.expression( 1728 exp.ClusteredByProperty, 1729 expressions=expressions, 1730 sorted_by=sorted_by, 1731 buckets=buckets, 1732 ) 1733 1734 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1735 if not self._match_text_seq("GRANTS"): 1736 self._retreat(self._index - 1) 1737 return None 1738 1739 return self.expression(exp.CopyGrantsProperty) 1740 1741 def _parse_freespace(self) -> exp.FreespaceProperty: 1742 self._match(TokenType.EQ) 1743 return self.expression( 1744 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1745 ) 1746 1747 def _parse_mergeblockratio( 1748 self, no: bool = False, default: bool = False 1749 ) -> exp.MergeBlockRatioProperty: 1750 if self._match(TokenType.EQ): 1751 return self.expression( 1752 exp.MergeBlockRatioProperty, 1753 this=self._parse_number(), 1754 percent=self._match(TokenType.PERCENT), 1755 ) 1756 1757 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1758 1759 def _parse_datablocksize( 1760 self, 1761 default: t.Optional[bool] = None, 1762 minimum: t.Optional[bool] = None, 1763 maximum: t.Optional[bool] = None, 1764 ) -> exp.DataBlocksizeProperty: 1765 self._match(TokenType.EQ) 1766 size = self._parse_number() 1767 1768 units = None 1769 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1770 units = self._prev.text 1771 1772 return self.expression( 1773 exp.DataBlocksizeProperty, 1774 size=size, 1775 units=units, 1776 default=default, 1777 minimum=minimum, 1778 maximum=maximum, 1779 ) 1780 1781 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1782 self._match(TokenType.EQ) 1783 always = self._match_text_seq("ALWAYS") 1784 manual = self._match_text_seq("MANUAL") 1785 never = self._match_text_seq("NEVER") 1786 default = self._match_text_seq("DEFAULT") 1787 1788 autotemp = None 1789 if self._match_text_seq("AUTOTEMP"): 1790 autotemp = self._parse_schema() 1791 1792 return self.expression( 1793 exp.BlockCompressionProperty, 1794 always=always, 1795 manual=manual, 1796 never=never, 1797 default=default, 1798 autotemp=autotemp, 1799 ) 1800 1801 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1802 no = self._match_text_seq("NO") 1803 concurrent = self._match_text_seq("CONCURRENT") 1804 self._match_text_seq("ISOLATED", "LOADING") 1805 for_all = self._match_text_seq("FOR", "ALL") 1806 for_insert = self._match_text_seq("FOR", "INSERT") 1807 for_none = self._match_text_seq("FOR", "NONE") 1808 return self.expression( 1809 exp.IsolatedLoadingProperty, 1810 no=no, 1811 concurrent=concurrent, 1812 for_all=for_all, 1813 for_insert=for_insert, 1814 for_none=for_none, 1815 ) 1816 1817 def _parse_locking(self) -> exp.LockingProperty: 1818 if self._match(TokenType.TABLE): 1819 kind = "TABLE" 1820 elif self._match(TokenType.VIEW): 1821 kind = "VIEW" 1822 elif self._match(TokenType.ROW): 1823 kind = "ROW" 1824 elif self._match_text_seq("DATABASE"): 1825 kind = "DATABASE" 1826 else: 1827 kind = None 1828 1829 if kind in ("DATABASE", "TABLE", "VIEW"): 1830 this = self._parse_table_parts() 1831 else: 1832 this = None 1833 1834 if self._match(TokenType.FOR): 1835 for_or_in = "FOR" 1836 elif self._match(TokenType.IN): 1837 for_or_in = "IN" 1838 else: 1839 for_or_in = None 1840 1841 if self._match_text_seq("ACCESS"): 1842 lock_type = "ACCESS" 1843 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1844 lock_type = "EXCLUSIVE" 1845 elif self._match_text_seq("SHARE"): 1846 lock_type = "SHARE" 1847 elif self._match_text_seq("READ"): 1848 lock_type = "READ" 1849 elif self._match_text_seq("WRITE"): 1850 lock_type = "WRITE" 1851 elif self._match_text_seq("CHECKSUM"): 1852 lock_type = "CHECKSUM" 1853 else: 1854 lock_type = None 1855 1856 override = self._match_text_seq("OVERRIDE") 1857 1858 return self.expression( 1859 exp.LockingProperty, 1860 this=this, 1861 kind=kind, 1862 for_or_in=for_or_in, 1863 lock_type=lock_type, 1864 override=override, 1865 ) 1866 1867 def _parse_partition_by(self) -> t.List[exp.Expression]: 1868 if self._match(TokenType.PARTITION_BY): 1869 return self._parse_csv(self._parse_conjunction) 1870 return [] 1871 1872 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1873 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1874 if self._match_text_seq("MINVALUE"): 1875 return exp.var("MINVALUE") 1876 if self._match_text_seq("MAXVALUE"): 1877 return exp.var("MAXVALUE") 1878 return self._parse_bitwise() 1879 1880 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1881 expression = None 1882 from_expressions = None 1883 to_expressions = None 1884 1885 if self._match(TokenType.IN): 1886 this = self._parse_wrapped_csv(self._parse_bitwise) 1887 elif self._match(TokenType.FROM): 1888 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1889 self._match_text_seq("TO") 1890 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1891 elif self._match_text_seq("WITH", "(", "MODULUS"): 1892 this = self._parse_number() 1893 self._match_text_seq(",", "REMAINDER") 1894 expression = self._parse_number() 1895 self._match_r_paren() 1896 else: 1897 self.raise_error("Failed to parse partition bound spec.") 1898 1899 return self.expression( 1900 exp.PartitionBoundSpec, 1901 this=this, 1902 expression=expression, 1903 from_expressions=from_expressions, 1904 to_expressions=to_expressions, 1905 ) 1906 1907 # https://www.postgresql.org/docs/current/sql-createtable.html 1908 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1909 if not self._match_text_seq("OF"): 1910 self._retreat(self._index - 1) 1911 return None 1912 1913 this = self._parse_table(schema=True) 1914 1915 if self._match(TokenType.DEFAULT): 1916 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1917 elif self._match_text_seq("FOR", "VALUES"): 1918 expression = self._parse_partition_bound_spec() 1919 else: 1920 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1921 1922 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1923 1924 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1925 self._match(TokenType.EQ) 1926 return self.expression( 1927 exp.PartitionedByProperty, 1928 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1929 ) 1930 1931 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1932 if self._match_text_seq("AND", "STATISTICS"): 1933 statistics = True 1934 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1935 statistics = False 1936 else: 1937 statistics = None 1938 1939 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1940 1941 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1942 if self._match_text_seq("SQL"): 1943 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 1944 return None 1945 1946 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1947 if self._match_text_seq("SQL", "DATA"): 1948 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 1949 return None 1950 1951 def _parse_no_property(self) -> t.Optional[exp.Expression]: 1952 if self._match_text_seq("PRIMARY", "INDEX"): 1953 return exp.NoPrimaryIndexProperty() 1954 if self._match_text_seq("SQL"): 1955 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 1956 return None 1957 1958 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1959 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1960 return exp.OnCommitProperty() 1961 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1962 return exp.OnCommitProperty(delete=True) 1963 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1964 1965 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1966 if self._match_text_seq("SQL", "DATA"): 1967 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 1968 return None 1969 1970 def _parse_distkey(self) -> exp.DistKeyProperty: 1971 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1972 1973 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1974 table = self._parse_table(schema=True) 1975 1976 options = [] 1977 while self._match_texts(("INCLUDING", "EXCLUDING")): 1978 this = self._prev.text.upper() 1979 1980 id_var = self._parse_id_var() 1981 if not id_var: 1982 return None 1983 1984 options.append( 1985 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1986 ) 1987 1988 return self.expression(exp.LikeProperty, this=table, expressions=options) 1989 1990 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1991 return self.expression( 1992 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1993 ) 1994 1995 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1996 self._match(TokenType.EQ) 1997 return self.expression( 1998 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1999 ) 2000 2001 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2002 self._match_text_seq("WITH", "CONNECTION") 2003 return self.expression( 2004 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2005 ) 2006 2007 def _parse_returns(self) -> exp.ReturnsProperty: 2008 value: t.Optional[exp.Expression] 2009 is_table = self._match(TokenType.TABLE) 2010 2011 if is_table: 2012 if self._match(TokenType.LT): 2013 value = self.expression( 2014 exp.Schema, 2015 this="TABLE", 2016 expressions=self._parse_csv(self._parse_struct_types), 2017 ) 2018 if not self._match(TokenType.GT): 2019 self.raise_error("Expecting >") 2020 else: 2021 value = self._parse_schema(exp.var("TABLE")) 2022 else: 2023 value = self._parse_types() 2024 2025 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2026 2027 def _parse_describe(self) -> exp.Describe: 2028 kind = self._match_set(self.CREATABLES) and self._prev.text 2029 extended = self._match_text_seq("EXTENDED") 2030 this = self._parse_table(schema=True) 2031 properties = self._parse_properties() 2032 expressions = properties.expressions if properties else None 2033 return self.expression( 2034 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2035 ) 2036 2037 def _parse_insert(self) -> exp.Insert: 2038 comments = ensure_list(self._prev_comments) 2039 overwrite = self._match(TokenType.OVERWRITE) 2040 ignore = self._match(TokenType.IGNORE) 2041 local = self._match_text_seq("LOCAL") 2042 alternative = None 2043 2044 if self._match_text_seq("DIRECTORY"): 2045 this: t.Optional[exp.Expression] = self.expression( 2046 exp.Directory, 2047 this=self._parse_var_or_string(), 2048 local=local, 2049 row_format=self._parse_row_format(match_row=True), 2050 ) 2051 else: 2052 if self._match(TokenType.OR): 2053 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2054 2055 self._match(TokenType.INTO) 2056 comments += ensure_list(self._prev_comments) 2057 self._match(TokenType.TABLE) 2058 this = self._parse_table(schema=True) 2059 2060 returning = self._parse_returning() 2061 2062 return self.expression( 2063 exp.Insert, 2064 comments=comments, 2065 this=this, 2066 by_name=self._match_text_seq("BY", "NAME"), 2067 exists=self._parse_exists(), 2068 partition=self._parse_partition(), 2069 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2070 and self._parse_conjunction(), 2071 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2072 conflict=self._parse_on_conflict(), 2073 returning=returning or self._parse_returning(), 2074 overwrite=overwrite, 2075 alternative=alternative, 2076 ignore=ignore, 2077 ) 2078 2079 def _parse_kill(self) -> exp.Kill: 2080 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2081 2082 return self.expression( 2083 exp.Kill, 2084 this=self._parse_primary(), 2085 kind=kind, 2086 ) 2087 2088 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2089 conflict = self._match_text_seq("ON", "CONFLICT") 2090 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2091 2092 if not conflict and not duplicate: 2093 return None 2094 2095 nothing = None 2096 expressions = None 2097 key = None 2098 constraint = None 2099 2100 if conflict: 2101 if self._match_text_seq("ON", "CONSTRAINT"): 2102 constraint = self._parse_id_var() 2103 else: 2104 key = self._parse_csv(self._parse_value) 2105 2106 self._match_text_seq("DO") 2107 if self._match_text_seq("NOTHING"): 2108 nothing = True 2109 else: 2110 self._match(TokenType.UPDATE) 2111 self._match(TokenType.SET) 2112 expressions = self._parse_csv(self._parse_equality) 2113 2114 return self.expression( 2115 exp.OnConflict, 2116 duplicate=duplicate, 2117 expressions=expressions, 2118 nothing=nothing, 2119 key=key, 2120 constraint=constraint, 2121 ) 2122 2123 def _parse_returning(self) -> t.Optional[exp.Returning]: 2124 if not self._match(TokenType.RETURNING): 2125 return None 2126 return self.expression( 2127 exp.Returning, 2128 expressions=self._parse_csv(self._parse_expression), 2129 into=self._match(TokenType.INTO) and self._parse_table_part(), 2130 ) 2131 2132 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2133 if not self._match(TokenType.FORMAT): 2134 return None 2135 return self._parse_row_format() 2136 2137 def _parse_row_format( 2138 self, match_row: bool = False 2139 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2140 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2141 return None 2142 2143 if self._match_text_seq("SERDE"): 2144 this = self._parse_string() 2145 2146 serde_properties = None 2147 if self._match(TokenType.SERDE_PROPERTIES): 2148 serde_properties = self.expression( 2149 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2150 ) 2151 2152 return self.expression( 2153 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2154 ) 2155 2156 self._match_text_seq("DELIMITED") 2157 2158 kwargs = {} 2159 2160 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2161 kwargs["fields"] = self._parse_string() 2162 if self._match_text_seq("ESCAPED", "BY"): 2163 kwargs["escaped"] = self._parse_string() 2164 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2165 kwargs["collection_items"] = self._parse_string() 2166 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2167 kwargs["map_keys"] = self._parse_string() 2168 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2169 kwargs["lines"] = self._parse_string() 2170 if self._match_text_seq("NULL", "DEFINED", "AS"): 2171 kwargs["null"] = self._parse_string() 2172 2173 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2174 2175 def _parse_load(self) -> exp.LoadData | exp.Command: 2176 if self._match_text_seq("DATA"): 2177 local = self._match_text_seq("LOCAL") 2178 self._match_text_seq("INPATH") 2179 inpath = self._parse_string() 2180 overwrite = self._match(TokenType.OVERWRITE) 2181 self._match_pair(TokenType.INTO, TokenType.TABLE) 2182 2183 return self.expression( 2184 exp.LoadData, 2185 this=self._parse_table(schema=True), 2186 local=local, 2187 overwrite=overwrite, 2188 inpath=inpath, 2189 partition=self._parse_partition(), 2190 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2191 serde=self._match_text_seq("SERDE") and self._parse_string(), 2192 ) 2193 return self._parse_as_command(self._prev) 2194 2195 def _parse_delete(self) -> exp.Delete: 2196 # This handles MySQL's "Multiple-Table Syntax" 2197 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2198 tables = None 2199 comments = self._prev_comments 2200 if not self._match(TokenType.FROM, advance=False): 2201 tables = self._parse_csv(self._parse_table) or None 2202 2203 returning = self._parse_returning() 2204 2205 return self.expression( 2206 exp.Delete, 2207 comments=comments, 2208 tables=tables, 2209 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2210 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2211 where=self._parse_where(), 2212 returning=returning or self._parse_returning(), 2213 limit=self._parse_limit(), 2214 ) 2215 2216 def _parse_update(self) -> exp.Update: 2217 comments = self._prev_comments 2218 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2219 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2220 returning = self._parse_returning() 2221 return self.expression( 2222 exp.Update, 2223 comments=comments, 2224 **{ # type: ignore 2225 "this": this, 2226 "expressions": expressions, 2227 "from": self._parse_from(joins=True), 2228 "where": self._parse_where(), 2229 "returning": returning or self._parse_returning(), 2230 "order": self._parse_order(), 2231 "limit": self._parse_limit(), 2232 }, 2233 ) 2234 2235 def _parse_uncache(self) -> exp.Uncache: 2236 if not self._match(TokenType.TABLE): 2237 self.raise_error("Expecting TABLE after UNCACHE") 2238 2239 return self.expression( 2240 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2241 ) 2242 2243 def _parse_cache(self) -> exp.Cache: 2244 lazy = self._match_text_seq("LAZY") 2245 self._match(TokenType.TABLE) 2246 table = self._parse_table(schema=True) 2247 2248 options = [] 2249 if self._match_text_seq("OPTIONS"): 2250 self._match_l_paren() 2251 k = self._parse_string() 2252 self._match(TokenType.EQ) 2253 v = self._parse_string() 2254 options = [k, v] 2255 self._match_r_paren() 2256 2257 self._match(TokenType.ALIAS) 2258 return self.expression( 2259 exp.Cache, 2260 this=table, 2261 lazy=lazy, 2262 options=options, 2263 expression=self._parse_select(nested=True), 2264 ) 2265 2266 def _parse_partition(self) -> t.Optional[exp.Partition]: 2267 if not self._match(TokenType.PARTITION): 2268 return None 2269 2270 return self.expression( 2271 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2272 ) 2273 2274 def _parse_value(self) -> exp.Tuple: 2275 if self._match(TokenType.L_PAREN): 2276 expressions = self._parse_csv(self._parse_expression) 2277 self._match_r_paren() 2278 return self.expression(exp.Tuple, expressions=expressions) 2279 2280 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2281 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2282 2283 def _parse_projections(self) -> t.List[exp.Expression]: 2284 return self._parse_expressions() 2285 2286 def _parse_select( 2287 self, 2288 nested: bool = False, 2289 table: bool = False, 2290 parse_subquery_alias: bool = True, 2291 parse_set_operation: bool = True, 2292 ) -> t.Optional[exp.Expression]: 2293 cte = self._parse_with() 2294 2295 if cte: 2296 this = self._parse_statement() 2297 2298 if not this: 2299 self.raise_error("Failed to parse any statement following CTE") 2300 return cte 2301 2302 if "with" in this.arg_types: 2303 this.set("with", cte) 2304 else: 2305 self.raise_error(f"{this.key} does not support CTE") 2306 this = cte 2307 2308 return this 2309 2310 # duckdb supports leading with FROM x 2311 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2312 2313 if self._match(TokenType.SELECT): 2314 comments = self._prev_comments 2315 2316 hint = self._parse_hint() 2317 all_ = self._match(TokenType.ALL) 2318 distinct = self._match_set(self.DISTINCT_TOKENS) 2319 2320 kind = ( 2321 self._match(TokenType.ALIAS) 2322 and self._match_texts(("STRUCT", "VALUE")) 2323 and self._prev.text.upper() 2324 ) 2325 2326 if distinct: 2327 distinct = self.expression( 2328 exp.Distinct, 2329 on=self._parse_value() if self._match(TokenType.ON) else None, 2330 ) 2331 2332 if all_ and distinct: 2333 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2334 2335 limit = self._parse_limit(top=True) 2336 projections = self._parse_projections() 2337 2338 this = self.expression( 2339 exp.Select, 2340 kind=kind, 2341 hint=hint, 2342 distinct=distinct, 2343 expressions=projections, 2344 limit=limit, 2345 ) 2346 this.comments = comments 2347 2348 into = self._parse_into() 2349 if into: 2350 this.set("into", into) 2351 2352 if not from_: 2353 from_ = self._parse_from() 2354 2355 if from_: 2356 this.set("from", from_) 2357 2358 this = self._parse_query_modifiers(this) 2359 elif (table or nested) and self._match(TokenType.L_PAREN): 2360 if self._match(TokenType.PIVOT): 2361 this = self._parse_simplified_pivot() 2362 elif self._match(TokenType.FROM): 2363 this = exp.select("*").from_( 2364 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2365 ) 2366 else: 2367 this = ( 2368 self._parse_table() 2369 if table 2370 else self._parse_select(nested=True, parse_set_operation=False) 2371 ) 2372 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2373 2374 self._match_r_paren() 2375 2376 # We return early here so that the UNION isn't attached to the subquery by the 2377 # following call to _parse_set_operations, but instead becomes the parent node 2378 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2379 elif self._match(TokenType.VALUES, advance=False): 2380 this = self._parse_derived_table_values() 2381 elif from_: 2382 this = exp.select("*").from_(from_.this, copy=False) 2383 else: 2384 this = None 2385 2386 if parse_set_operation: 2387 return self._parse_set_operations(this) 2388 return this 2389 2390 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2391 if not skip_with_token and not self._match(TokenType.WITH): 2392 return None 2393 2394 comments = self._prev_comments 2395 recursive = self._match(TokenType.RECURSIVE) 2396 2397 expressions = [] 2398 while True: 2399 expressions.append(self._parse_cte()) 2400 2401 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2402 break 2403 else: 2404 self._match(TokenType.WITH) 2405 2406 return self.expression( 2407 exp.With, comments=comments, expressions=expressions, recursive=recursive 2408 ) 2409 2410 def _parse_cte(self) -> exp.CTE: 2411 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2412 if not alias or not alias.this: 2413 self.raise_error("Expected CTE to have alias") 2414 2415 self._match(TokenType.ALIAS) 2416 return self.expression( 2417 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2418 ) 2419 2420 def _parse_table_alias( 2421 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2422 ) -> t.Optional[exp.TableAlias]: 2423 any_token = self._match(TokenType.ALIAS) 2424 alias = ( 2425 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2426 or self._parse_string_as_identifier() 2427 ) 2428 2429 index = self._index 2430 if self._match(TokenType.L_PAREN): 2431 columns = self._parse_csv(self._parse_function_parameter) 2432 self._match_r_paren() if columns else self._retreat(index) 2433 else: 2434 columns = None 2435 2436 if not alias and not columns: 2437 return None 2438 2439 return self.expression(exp.TableAlias, this=alias, columns=columns) 2440 2441 def _parse_subquery( 2442 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2443 ) -> t.Optional[exp.Subquery]: 2444 if not this: 2445 return None 2446 2447 return self.expression( 2448 exp.Subquery, 2449 this=this, 2450 pivots=self._parse_pivots(), 2451 alias=self._parse_table_alias() if parse_alias else None, 2452 ) 2453 2454 def _parse_query_modifiers( 2455 self, this: t.Optional[exp.Expression] 2456 ) -> t.Optional[exp.Expression]: 2457 if isinstance(this, self.MODIFIABLES): 2458 for join in iter(self._parse_join, None): 2459 this.append("joins", join) 2460 for lateral in iter(self._parse_lateral, None): 2461 this.append("laterals", lateral) 2462 2463 while True: 2464 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2465 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2466 key, expression = parser(self) 2467 2468 if expression: 2469 this.set(key, expression) 2470 if key == "limit": 2471 offset = expression.args.pop("offset", None) 2472 2473 if offset: 2474 offset = exp.Offset(expression=offset) 2475 this.set("offset", offset) 2476 2477 limit_by_expressions = expression.expressions 2478 expression.set("expressions", None) 2479 offset.set("expressions", limit_by_expressions) 2480 continue 2481 break 2482 return this 2483 2484 def _parse_hint(self) -> t.Optional[exp.Hint]: 2485 if self._match(TokenType.HINT): 2486 hints = [] 2487 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2488 hints.extend(hint) 2489 2490 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2491 self.raise_error("Expected */ after HINT") 2492 2493 return self.expression(exp.Hint, expressions=hints) 2494 2495 return None 2496 2497 def _parse_into(self) -> t.Optional[exp.Into]: 2498 if not self._match(TokenType.INTO): 2499 return None 2500 2501 temp = self._match(TokenType.TEMPORARY) 2502 unlogged = self._match_text_seq("UNLOGGED") 2503 self._match(TokenType.TABLE) 2504 2505 return self.expression( 2506 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2507 ) 2508 2509 def _parse_from( 2510 self, joins: bool = False, skip_from_token: bool = False 2511 ) -> t.Optional[exp.From]: 2512 if not skip_from_token and not self._match(TokenType.FROM): 2513 return None 2514 2515 return self.expression( 2516 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2517 ) 2518 2519 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2520 if not self._match(TokenType.MATCH_RECOGNIZE): 2521 return None 2522 2523 self._match_l_paren() 2524 2525 partition = self._parse_partition_by() 2526 order = self._parse_order() 2527 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2528 2529 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2530 rows = exp.var("ONE ROW PER MATCH") 2531 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2532 text = "ALL ROWS PER MATCH" 2533 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2534 text += " SHOW EMPTY MATCHES" 2535 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2536 text += " OMIT EMPTY MATCHES" 2537 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2538 text += " WITH UNMATCHED ROWS" 2539 rows = exp.var(text) 2540 else: 2541 rows = None 2542 2543 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2544 text = "AFTER MATCH SKIP" 2545 if self._match_text_seq("PAST", "LAST", "ROW"): 2546 text += " PAST LAST ROW" 2547 elif self._match_text_seq("TO", "NEXT", "ROW"): 2548 text += " TO NEXT ROW" 2549 elif self._match_text_seq("TO", "FIRST"): 2550 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2551 elif self._match_text_seq("TO", "LAST"): 2552 text += f" TO LAST {self._advance_any().text}" # type: ignore 2553 after = exp.var(text) 2554 else: 2555 after = None 2556 2557 if self._match_text_seq("PATTERN"): 2558 self._match_l_paren() 2559 2560 if not self._curr: 2561 self.raise_error("Expecting )", self._curr) 2562 2563 paren = 1 2564 start = self._curr 2565 2566 while self._curr and paren > 0: 2567 if self._curr.token_type == TokenType.L_PAREN: 2568 paren += 1 2569 if self._curr.token_type == TokenType.R_PAREN: 2570 paren -= 1 2571 2572 end = self._prev 2573 self._advance() 2574 2575 if paren > 0: 2576 self.raise_error("Expecting )", self._curr) 2577 2578 pattern = exp.var(self._find_sql(start, end)) 2579 else: 2580 pattern = None 2581 2582 define = ( 2583 self._parse_csv(self._parse_name_as_expression) 2584 if self._match_text_seq("DEFINE") 2585 else None 2586 ) 2587 2588 self._match_r_paren() 2589 2590 return self.expression( 2591 exp.MatchRecognize, 2592 partition_by=partition, 2593 order=order, 2594 measures=measures, 2595 rows=rows, 2596 after=after, 2597 pattern=pattern, 2598 define=define, 2599 alias=self._parse_table_alias(), 2600 ) 2601 2602 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2603 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2604 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2605 cross_apply = False 2606 2607 if cross_apply is not None: 2608 this = self._parse_select(table=True) 2609 view = None 2610 outer = None 2611 elif self._match(TokenType.LATERAL): 2612 this = self._parse_select(table=True) 2613 view = self._match(TokenType.VIEW) 2614 outer = self._match(TokenType.OUTER) 2615 else: 2616 return None 2617 2618 if not this: 2619 this = ( 2620 self._parse_unnest() 2621 or self._parse_function() 2622 or self._parse_id_var(any_token=False) 2623 ) 2624 2625 while self._match(TokenType.DOT): 2626 this = exp.Dot( 2627 this=this, 2628 expression=self._parse_function() or self._parse_id_var(any_token=False), 2629 ) 2630 2631 if view: 2632 table = self._parse_id_var(any_token=False) 2633 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2634 table_alias: t.Optional[exp.TableAlias] = self.expression( 2635 exp.TableAlias, this=table, columns=columns 2636 ) 2637 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2638 # We move the alias from the lateral's child node to the lateral itself 2639 table_alias = this.args["alias"].pop() 2640 else: 2641 table_alias = self._parse_table_alias() 2642 2643 return self.expression( 2644 exp.Lateral, 2645 this=this, 2646 view=view, 2647 outer=outer, 2648 alias=table_alias, 2649 cross_apply=cross_apply, 2650 ) 2651 2652 def _parse_join_parts( 2653 self, 2654 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2655 return ( 2656 self._match_set(self.JOIN_METHODS) and self._prev, 2657 self._match_set(self.JOIN_SIDES) and self._prev, 2658 self._match_set(self.JOIN_KINDS) and self._prev, 2659 ) 2660 2661 def _parse_join( 2662 self, skip_join_token: bool = False, parse_bracket: bool = False 2663 ) -> t.Optional[exp.Join]: 2664 if self._match(TokenType.COMMA): 2665 return self.expression(exp.Join, this=self._parse_table()) 2666 2667 index = self._index 2668 method, side, kind = self._parse_join_parts() 2669 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2670 join = self._match(TokenType.JOIN) 2671 2672 if not skip_join_token and not join: 2673 self._retreat(index) 2674 kind = None 2675 method = None 2676 side = None 2677 2678 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2679 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2680 2681 if not skip_join_token and not join and not outer_apply and not cross_apply: 2682 return None 2683 2684 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2685 2686 if method: 2687 kwargs["method"] = method.text 2688 if side: 2689 kwargs["side"] = side.text 2690 if kind: 2691 kwargs["kind"] = kind.text 2692 if hint: 2693 kwargs["hint"] = hint 2694 2695 if self._match(TokenType.ON): 2696 kwargs["on"] = self._parse_conjunction() 2697 elif self._match(TokenType.USING): 2698 kwargs["using"] = self._parse_wrapped_id_vars() 2699 elif not (kind and kind.token_type == TokenType.CROSS): 2700 index = self._index 2701 join = self._parse_join() 2702 2703 if join and self._match(TokenType.ON): 2704 kwargs["on"] = self._parse_conjunction() 2705 elif join and self._match(TokenType.USING): 2706 kwargs["using"] = self._parse_wrapped_id_vars() 2707 else: 2708 join = None 2709 self._retreat(index) 2710 2711 kwargs["this"].set("joins", [join] if join else None) 2712 2713 comments = [c for token in (method, side, kind) if token for c in token.comments] 2714 return self.expression(exp.Join, comments=comments, **kwargs) 2715 2716 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2717 this = self._parse_conjunction() 2718 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2719 return this 2720 2721 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2722 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2723 2724 return this 2725 2726 def _parse_index( 2727 self, 2728 index: t.Optional[exp.Expression] = None, 2729 ) -> t.Optional[exp.Index]: 2730 if index: 2731 unique = None 2732 primary = None 2733 amp = None 2734 2735 self._match(TokenType.ON) 2736 self._match(TokenType.TABLE) # hive 2737 table = self._parse_table_parts(schema=True) 2738 else: 2739 unique = self._match(TokenType.UNIQUE) 2740 primary = self._match_text_seq("PRIMARY") 2741 amp = self._match_text_seq("AMP") 2742 2743 if not self._match(TokenType.INDEX): 2744 return None 2745 2746 index = self._parse_id_var() 2747 table = None 2748 2749 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2750 2751 if self._match(TokenType.L_PAREN, advance=False): 2752 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2753 else: 2754 columns = None 2755 2756 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2757 2758 return self.expression( 2759 exp.Index, 2760 this=index, 2761 table=table, 2762 using=using, 2763 columns=columns, 2764 unique=unique, 2765 primary=primary, 2766 amp=amp, 2767 include=include, 2768 partition_by=self._parse_partition_by(), 2769 where=self._parse_where(), 2770 ) 2771 2772 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2773 hints: t.List[exp.Expression] = [] 2774 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2775 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2776 hints.append( 2777 self.expression( 2778 exp.WithTableHint, 2779 expressions=self._parse_csv( 2780 lambda: self._parse_function() or self._parse_var(any_token=True) 2781 ), 2782 ) 2783 ) 2784 self._match_r_paren() 2785 else: 2786 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2787 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2788 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2789 2790 self._match_texts(("INDEX", "KEY")) 2791 if self._match(TokenType.FOR): 2792 hint.set("target", self._advance_any() and self._prev.text.upper()) 2793 2794 hint.set("expressions", self._parse_wrapped_id_vars()) 2795 hints.append(hint) 2796 2797 return hints or None 2798 2799 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2800 return ( 2801 (not schema and self._parse_function(optional_parens=False)) 2802 or self._parse_id_var(any_token=False) 2803 or self._parse_string_as_identifier() 2804 or self._parse_placeholder() 2805 ) 2806 2807 def _parse_table_parts(self, schema: bool = False, is_db_reference: bool = False) -> exp.Table: 2808 catalog = None 2809 db = None 2810 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2811 2812 while self._match(TokenType.DOT): 2813 if catalog: 2814 # This allows nesting the table in arbitrarily many dot expressions if needed 2815 table = self.expression( 2816 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2817 ) 2818 else: 2819 catalog = db 2820 db = table 2821 table = self._parse_table_part(schema=schema) or "" 2822 2823 if is_db_reference: 2824 catalog = db 2825 db = table 2826 table = None 2827 2828 if not table and not is_db_reference: 2829 self.raise_error(f"Expected table name but got {self._curr}") 2830 if not db and is_db_reference: 2831 self.raise_error(f"Expected database name but got {self._curr}") 2832 2833 return self.expression( 2834 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2835 ) 2836 2837 def _parse_table( 2838 self, 2839 schema: bool = False, 2840 joins: bool = False, 2841 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2842 parse_bracket: bool = False, 2843 is_db_reference: bool = False, 2844 ) -> t.Optional[exp.Expression]: 2845 lateral = self._parse_lateral() 2846 if lateral: 2847 return lateral 2848 2849 unnest = self._parse_unnest() 2850 if unnest: 2851 return unnest 2852 2853 values = self._parse_derived_table_values() 2854 if values: 2855 return values 2856 2857 subquery = self._parse_select(table=True) 2858 if subquery: 2859 if not subquery.args.get("pivots"): 2860 subquery.set("pivots", self._parse_pivots()) 2861 return subquery 2862 2863 bracket = parse_bracket and self._parse_bracket(None) 2864 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2865 this = t.cast( 2866 exp.Expression, 2867 bracket 2868 or self._parse_bracket( 2869 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 2870 ), 2871 ) 2872 2873 if schema: 2874 return self._parse_schema(this=this) 2875 2876 version = self._parse_version() 2877 2878 if version: 2879 this.set("version", version) 2880 2881 if self.dialect.ALIAS_POST_TABLESAMPLE: 2882 table_sample = self._parse_table_sample() 2883 2884 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2885 if alias: 2886 this.set("alias", alias) 2887 2888 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2889 return self.expression( 2890 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2891 ) 2892 2893 this.set("hints", self._parse_table_hints()) 2894 2895 if not this.args.get("pivots"): 2896 this.set("pivots", self._parse_pivots()) 2897 2898 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2899 table_sample = self._parse_table_sample() 2900 2901 if table_sample: 2902 table_sample.set("this", this) 2903 this = table_sample 2904 2905 if joins: 2906 for join in iter(self._parse_join, None): 2907 this.append("joins", join) 2908 2909 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2910 this.set("ordinality", True) 2911 this.set("alias", self._parse_table_alias()) 2912 2913 return this 2914 2915 def _parse_version(self) -> t.Optional[exp.Version]: 2916 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2917 this = "TIMESTAMP" 2918 elif self._match(TokenType.VERSION_SNAPSHOT): 2919 this = "VERSION" 2920 else: 2921 return None 2922 2923 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2924 kind = self._prev.text.upper() 2925 start = self._parse_bitwise() 2926 self._match_texts(("TO", "AND")) 2927 end = self._parse_bitwise() 2928 expression: t.Optional[exp.Expression] = self.expression( 2929 exp.Tuple, expressions=[start, end] 2930 ) 2931 elif self._match_text_seq("CONTAINED", "IN"): 2932 kind = "CONTAINED IN" 2933 expression = self.expression( 2934 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2935 ) 2936 elif self._match(TokenType.ALL): 2937 kind = "ALL" 2938 expression = None 2939 else: 2940 self._match_text_seq("AS", "OF") 2941 kind = "AS OF" 2942 expression = self._parse_type() 2943 2944 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2945 2946 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2947 if not self._match(TokenType.UNNEST): 2948 return None 2949 2950 expressions = self._parse_wrapped_csv(self._parse_equality) 2951 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2952 2953 alias = self._parse_table_alias() if with_alias else None 2954 2955 if alias: 2956 if self.dialect.UNNEST_COLUMN_ONLY: 2957 if alias.args.get("columns"): 2958 self.raise_error("Unexpected extra column alias in unnest.") 2959 2960 alias.set("columns", [alias.this]) 2961 alias.set("this", None) 2962 2963 columns = alias.args.get("columns") or [] 2964 if offset and len(expressions) < len(columns): 2965 offset = columns.pop() 2966 2967 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2968 self._match(TokenType.ALIAS) 2969 offset = self._parse_id_var( 2970 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2971 ) or exp.to_identifier("offset") 2972 2973 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2974 2975 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2976 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2977 if not is_derived and not self._match_text_seq("VALUES"): 2978 return None 2979 2980 expressions = self._parse_csv(self._parse_value) 2981 alias = self._parse_table_alias() 2982 2983 if is_derived: 2984 self._match_r_paren() 2985 2986 return self.expression( 2987 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2988 ) 2989 2990 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2991 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2992 as_modifier and self._match_text_seq("USING", "SAMPLE") 2993 ): 2994 return None 2995 2996 bucket_numerator = None 2997 bucket_denominator = None 2998 bucket_field = None 2999 percent = None 3000 size = None 3001 seed = None 3002 3003 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3004 matched_l_paren = self._match(TokenType.L_PAREN) 3005 3006 if self.TABLESAMPLE_CSV: 3007 num = None 3008 expressions = self._parse_csv(self._parse_primary) 3009 else: 3010 expressions = None 3011 num = ( 3012 self._parse_factor() 3013 if self._match(TokenType.NUMBER, advance=False) 3014 else self._parse_primary() or self._parse_placeholder() 3015 ) 3016 3017 if self._match_text_seq("BUCKET"): 3018 bucket_numerator = self._parse_number() 3019 self._match_text_seq("OUT", "OF") 3020 bucket_denominator = bucket_denominator = self._parse_number() 3021 self._match(TokenType.ON) 3022 bucket_field = self._parse_field() 3023 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3024 percent = num 3025 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3026 size = num 3027 else: 3028 percent = num 3029 3030 if matched_l_paren: 3031 self._match_r_paren() 3032 3033 if self._match(TokenType.L_PAREN): 3034 method = self._parse_var(upper=True) 3035 seed = self._match(TokenType.COMMA) and self._parse_number() 3036 self._match_r_paren() 3037 elif self._match_texts(("SEED", "REPEATABLE")): 3038 seed = self._parse_wrapped(self._parse_number) 3039 3040 return self.expression( 3041 exp.TableSample, 3042 expressions=expressions, 3043 method=method, 3044 bucket_numerator=bucket_numerator, 3045 bucket_denominator=bucket_denominator, 3046 bucket_field=bucket_field, 3047 percent=percent, 3048 size=size, 3049 seed=seed, 3050 ) 3051 3052 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3053 return list(iter(self._parse_pivot, None)) or None 3054 3055 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 3056 return list(iter(self._parse_join, None)) or None 3057 3058 # https://duckdb.org/docs/sql/statements/pivot 3059 def _parse_simplified_pivot(self) -> exp.Pivot: 3060 def _parse_on() -> t.Optional[exp.Expression]: 3061 this = self._parse_bitwise() 3062 return self._parse_in(this) if self._match(TokenType.IN) else this 3063 3064 this = self._parse_table() 3065 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3066 using = self._match(TokenType.USING) and self._parse_csv( 3067 lambda: self._parse_alias(self._parse_function()) 3068 ) 3069 group = self._parse_group() 3070 return self.expression( 3071 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3072 ) 3073 3074 def _parse_pivot_in(self) -> exp.In: 3075 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3076 this = self._parse_conjunction() 3077 3078 self._match(TokenType.ALIAS) 3079 alias = self._parse_field() 3080 if alias: 3081 return self.expression(exp.PivotAlias, this=this, alias=alias) 3082 3083 return this 3084 3085 value = self._parse_column() 3086 3087 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3088 self.raise_error("Expecting IN (") 3089 3090 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3091 3092 self._match_r_paren() 3093 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3094 3095 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3096 index = self._index 3097 include_nulls = None 3098 3099 if self._match(TokenType.PIVOT): 3100 unpivot = False 3101 elif self._match(TokenType.UNPIVOT): 3102 unpivot = True 3103 3104 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3105 if self._match_text_seq("INCLUDE", "NULLS"): 3106 include_nulls = True 3107 elif self._match_text_seq("EXCLUDE", "NULLS"): 3108 include_nulls = False 3109 else: 3110 return None 3111 3112 expressions = [] 3113 3114 if not self._match(TokenType.L_PAREN): 3115 self._retreat(index) 3116 return None 3117 3118 if unpivot: 3119 expressions = self._parse_csv(self._parse_column) 3120 else: 3121 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3122 3123 if not expressions: 3124 self.raise_error("Failed to parse PIVOT's aggregation list") 3125 3126 if not self._match(TokenType.FOR): 3127 self.raise_error("Expecting FOR") 3128 3129 field = self._parse_pivot_in() 3130 3131 self._match_r_paren() 3132 3133 pivot = self.expression( 3134 exp.Pivot, 3135 expressions=expressions, 3136 field=field, 3137 unpivot=unpivot, 3138 include_nulls=include_nulls, 3139 ) 3140 3141 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3142 pivot.set("alias", self._parse_table_alias()) 3143 3144 if not unpivot: 3145 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3146 3147 columns: t.List[exp.Expression] = [] 3148 for fld in pivot.args["field"].expressions: 3149 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3150 for name in names: 3151 if self.PREFIXED_PIVOT_COLUMNS: 3152 name = f"{name}_{field_name}" if name else field_name 3153 else: 3154 name = f"{field_name}_{name}" if name else field_name 3155 3156 columns.append(exp.to_identifier(name)) 3157 3158 pivot.set("columns", columns) 3159 3160 return pivot 3161 3162 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3163 return [agg.alias for agg in aggregations] 3164 3165 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3166 if not skip_where_token and not self._match(TokenType.WHERE): 3167 return None 3168 3169 return self.expression( 3170 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3171 ) 3172 3173 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3174 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3175 return None 3176 3177 elements = defaultdict(list) 3178 3179 if self._match(TokenType.ALL): 3180 return self.expression(exp.Group, all=True) 3181 3182 while True: 3183 expressions = self._parse_csv(self._parse_conjunction) 3184 if expressions: 3185 elements["expressions"].extend(expressions) 3186 3187 grouping_sets = self._parse_grouping_sets() 3188 if grouping_sets: 3189 elements["grouping_sets"].extend(grouping_sets) 3190 3191 rollup = None 3192 cube = None 3193 totals = None 3194 3195 index = self._index 3196 with_ = self._match(TokenType.WITH) 3197 if self._match(TokenType.ROLLUP): 3198 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3199 elements["rollup"].extend(ensure_list(rollup)) 3200 3201 if self._match(TokenType.CUBE): 3202 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3203 elements["cube"].extend(ensure_list(cube)) 3204 3205 if self._match_text_seq("TOTALS"): 3206 totals = True 3207 elements["totals"] = True # type: ignore 3208 3209 if not (grouping_sets or rollup or cube or totals): 3210 if with_: 3211 self._retreat(index) 3212 break 3213 3214 return self.expression(exp.Group, **elements) # type: ignore 3215 3216 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3217 if not self._match(TokenType.GROUPING_SETS): 3218 return None 3219 3220 return self._parse_wrapped_csv(self._parse_grouping_set) 3221 3222 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3223 if self._match(TokenType.L_PAREN): 3224 grouping_set = self._parse_csv(self._parse_column) 3225 self._match_r_paren() 3226 return self.expression(exp.Tuple, expressions=grouping_set) 3227 3228 return self._parse_column() 3229 3230 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3231 if not skip_having_token and not self._match(TokenType.HAVING): 3232 return None 3233 return self.expression(exp.Having, this=self._parse_conjunction()) 3234 3235 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3236 if not self._match(TokenType.QUALIFY): 3237 return None 3238 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3239 3240 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3241 if skip_start_token: 3242 start = None 3243 elif self._match(TokenType.START_WITH): 3244 start = self._parse_conjunction() 3245 else: 3246 return None 3247 3248 self._match(TokenType.CONNECT_BY) 3249 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3250 exp.Prior, this=self._parse_bitwise() 3251 ) 3252 connect = self._parse_conjunction() 3253 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3254 3255 if not start and self._match(TokenType.START_WITH): 3256 start = self._parse_conjunction() 3257 3258 return self.expression(exp.Connect, start=start, connect=connect) 3259 3260 def _parse_name_as_expression(self) -> exp.Alias: 3261 return self.expression( 3262 exp.Alias, 3263 alias=self._parse_id_var(any_token=True), 3264 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3265 ) 3266 3267 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3268 if self._match_text_seq("INTERPOLATE"): 3269 return self._parse_wrapped_csv(self._parse_name_as_expression) 3270 return None 3271 3272 def _parse_order( 3273 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3274 ) -> t.Optional[exp.Expression]: 3275 siblings = None 3276 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3277 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3278 return this 3279 3280 siblings = True 3281 3282 return self.expression( 3283 exp.Order, 3284 this=this, 3285 expressions=self._parse_csv(self._parse_ordered), 3286 interpolate=self._parse_interpolate(), 3287 siblings=siblings, 3288 ) 3289 3290 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3291 if not self._match(token): 3292 return None 3293 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3294 3295 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3296 this = parse_method() if parse_method else self._parse_conjunction() 3297 3298 asc = self._match(TokenType.ASC) 3299 desc = self._match(TokenType.DESC) or (asc and False) 3300 3301 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3302 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3303 3304 nulls_first = is_nulls_first or False 3305 explicitly_null_ordered = is_nulls_first or is_nulls_last 3306 3307 if ( 3308 not explicitly_null_ordered 3309 and ( 3310 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3311 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3312 ) 3313 and self.dialect.NULL_ORDERING != "nulls_are_last" 3314 ): 3315 nulls_first = True 3316 3317 if self._match_text_seq("WITH", "FILL"): 3318 with_fill = self.expression( 3319 exp.WithFill, 3320 **{ # type: ignore 3321 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3322 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3323 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3324 }, 3325 ) 3326 else: 3327 with_fill = None 3328 3329 return self.expression( 3330 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3331 ) 3332 3333 def _parse_limit( 3334 self, this: t.Optional[exp.Expression] = None, top: bool = False 3335 ) -> t.Optional[exp.Expression]: 3336 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3337 comments = self._prev_comments 3338 if top: 3339 limit_paren = self._match(TokenType.L_PAREN) 3340 expression = self._parse_term() if limit_paren else self._parse_number() 3341 3342 if limit_paren: 3343 self._match_r_paren() 3344 else: 3345 expression = self._parse_term() 3346 3347 if self._match(TokenType.COMMA): 3348 offset = expression 3349 expression = self._parse_term() 3350 else: 3351 offset = None 3352 3353 limit_exp = self.expression( 3354 exp.Limit, 3355 this=this, 3356 expression=expression, 3357 offset=offset, 3358 comments=comments, 3359 expressions=self._parse_limit_by(), 3360 ) 3361 3362 return limit_exp 3363 3364 if self._match(TokenType.FETCH): 3365 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3366 direction = self._prev.text.upper() if direction else "FIRST" 3367 3368 count = self._parse_field(tokens=self.FETCH_TOKENS) 3369 percent = self._match(TokenType.PERCENT) 3370 3371 self._match_set((TokenType.ROW, TokenType.ROWS)) 3372 3373 only = self._match_text_seq("ONLY") 3374 with_ties = self._match_text_seq("WITH", "TIES") 3375 3376 if only and with_ties: 3377 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3378 3379 return self.expression( 3380 exp.Fetch, 3381 direction=direction, 3382 count=count, 3383 percent=percent, 3384 with_ties=with_ties, 3385 ) 3386 3387 return this 3388 3389 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3390 if not self._match(TokenType.OFFSET): 3391 return this 3392 3393 count = self._parse_term() 3394 self._match_set((TokenType.ROW, TokenType.ROWS)) 3395 3396 return self.expression( 3397 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3398 ) 3399 3400 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3401 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3402 3403 def _parse_locks(self) -> t.List[exp.Lock]: 3404 locks = [] 3405 while True: 3406 if self._match_text_seq("FOR", "UPDATE"): 3407 update = True 3408 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3409 "LOCK", "IN", "SHARE", "MODE" 3410 ): 3411 update = False 3412 else: 3413 break 3414 3415 expressions = None 3416 if self._match_text_seq("OF"): 3417 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3418 3419 wait: t.Optional[bool | exp.Expression] = None 3420 if self._match_text_seq("NOWAIT"): 3421 wait = True 3422 elif self._match_text_seq("WAIT"): 3423 wait = self._parse_primary() 3424 elif self._match_text_seq("SKIP", "LOCKED"): 3425 wait = False 3426 3427 locks.append( 3428 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3429 ) 3430 3431 return locks 3432 3433 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3434 while this and self._match_set(self.SET_OPERATIONS): 3435 token_type = self._prev.token_type 3436 3437 if token_type == TokenType.UNION: 3438 operation = exp.Union 3439 elif token_type == TokenType.EXCEPT: 3440 operation = exp.Except 3441 else: 3442 operation = exp.Intersect 3443 3444 comments = self._prev.comments 3445 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3446 by_name = self._match_text_seq("BY", "NAME") 3447 expression = self._parse_select(nested=True, parse_set_operation=False) 3448 3449 this = self.expression( 3450 operation, 3451 comments=comments, 3452 this=this, 3453 distinct=distinct, 3454 by_name=by_name, 3455 expression=expression, 3456 ) 3457 3458 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3459 expression = this.expression 3460 3461 if expression: 3462 for arg in self.UNION_MODIFIERS: 3463 expr = expression.args.get(arg) 3464 if expr: 3465 this.set(arg, expr.pop()) 3466 3467 return this 3468 3469 def _parse_expression(self) -> t.Optional[exp.Expression]: 3470 return self._parse_alias(self._parse_conjunction()) 3471 3472 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3473 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3474 3475 def _parse_equality(self) -> t.Optional[exp.Expression]: 3476 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3477 3478 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3479 return self._parse_tokens(self._parse_range, self.COMPARISON) 3480 3481 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3482 this = this or self._parse_bitwise() 3483 negate = self._match(TokenType.NOT) 3484 3485 if self._match_set(self.RANGE_PARSERS): 3486 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3487 if not expression: 3488 return this 3489 3490 this = expression 3491 elif self._match(TokenType.ISNULL): 3492 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3493 3494 # Postgres supports ISNULL and NOTNULL for conditions. 3495 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3496 if self._match(TokenType.NOTNULL): 3497 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3498 this = self.expression(exp.Not, this=this) 3499 3500 if negate: 3501 this = self.expression(exp.Not, this=this) 3502 3503 if self._match(TokenType.IS): 3504 this = self._parse_is(this) 3505 3506 return this 3507 3508 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3509 index = self._index - 1 3510 negate = self._match(TokenType.NOT) 3511 3512 if self._match_text_seq("DISTINCT", "FROM"): 3513 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3514 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3515 3516 expression = self._parse_null() or self._parse_boolean() 3517 if not expression: 3518 self._retreat(index) 3519 return None 3520 3521 this = self.expression(exp.Is, this=this, expression=expression) 3522 return self.expression(exp.Not, this=this) if negate else this 3523 3524 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3525 unnest = self._parse_unnest(with_alias=False) 3526 if unnest: 3527 this = self.expression(exp.In, this=this, unnest=unnest) 3528 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3529 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3530 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3531 3532 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3533 this = self.expression(exp.In, this=this, query=expressions[0]) 3534 else: 3535 this = self.expression(exp.In, this=this, expressions=expressions) 3536 3537 if matched_l_paren: 3538 self._match_r_paren(this) 3539 elif not self._match(TokenType.R_BRACKET, expression=this): 3540 self.raise_error("Expecting ]") 3541 else: 3542 this = self.expression(exp.In, this=this, field=self._parse_field()) 3543 3544 return this 3545 3546 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3547 low = self._parse_bitwise() 3548 self._match(TokenType.AND) 3549 high = self._parse_bitwise() 3550 return self.expression(exp.Between, this=this, low=low, high=high) 3551 3552 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3553 if not self._match(TokenType.ESCAPE): 3554 return this 3555 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3556 3557 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3558 index = self._index 3559 3560 if not self._match(TokenType.INTERVAL) and match_interval: 3561 return None 3562 3563 if self._match(TokenType.STRING, advance=False): 3564 this = self._parse_primary() 3565 else: 3566 this = self._parse_term() 3567 3568 if not this or ( 3569 isinstance(this, exp.Column) 3570 and not this.table 3571 and not this.this.quoted 3572 and this.name.upper() == "IS" 3573 ): 3574 self._retreat(index) 3575 return None 3576 3577 unit = self._parse_function() or ( 3578 not self._match(TokenType.ALIAS, advance=False) 3579 and self._parse_var(any_token=True, upper=True) 3580 ) 3581 3582 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3583 # each INTERVAL expression into this canonical form so it's easy to transpile 3584 if this and this.is_number: 3585 this = exp.Literal.string(this.name) 3586 elif this and this.is_string: 3587 parts = this.name.split() 3588 3589 if len(parts) == 2: 3590 if unit: 3591 # This is not actually a unit, it's something else (e.g. a "window side") 3592 unit = None 3593 self._retreat(self._index - 1) 3594 3595 this = exp.Literal.string(parts[0]) 3596 unit = self.expression(exp.Var, this=parts[1].upper()) 3597 3598 return self.expression(exp.Interval, this=this, unit=unit) 3599 3600 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3601 this = self._parse_term() 3602 3603 while True: 3604 if self._match_set(self.BITWISE): 3605 this = self.expression( 3606 self.BITWISE[self._prev.token_type], 3607 this=this, 3608 expression=self._parse_term(), 3609 ) 3610 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3611 this = self.expression( 3612 exp.DPipe, 3613 this=this, 3614 expression=self._parse_term(), 3615 safe=not self.dialect.STRICT_STRING_CONCAT, 3616 ) 3617 elif self._match(TokenType.DQMARK): 3618 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3619 elif self._match_pair(TokenType.LT, TokenType.LT): 3620 this = self.expression( 3621 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3622 ) 3623 elif self._match_pair(TokenType.GT, TokenType.GT): 3624 this = self.expression( 3625 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3626 ) 3627 else: 3628 break 3629 3630 return this 3631 3632 def _parse_term(self) -> t.Optional[exp.Expression]: 3633 return self._parse_tokens(self._parse_factor, self.TERM) 3634 3635 def _parse_factor(self) -> t.Optional[exp.Expression]: 3636 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3637 this = parse_method() 3638 3639 while self._match_set(self.FACTOR): 3640 this = self.expression( 3641 self.FACTOR[self._prev.token_type], 3642 this=this, 3643 comments=self._prev_comments, 3644 expression=parse_method(), 3645 ) 3646 if isinstance(this, exp.Div): 3647 this.args["typed"] = self.dialect.TYPED_DIVISION 3648 this.args["safe"] = self.dialect.SAFE_DIVISION 3649 3650 return this 3651 3652 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3653 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3654 3655 def _parse_unary(self) -> t.Optional[exp.Expression]: 3656 if self._match_set(self.UNARY_PARSERS): 3657 return self.UNARY_PARSERS[self._prev.token_type](self) 3658 return self._parse_at_time_zone(self._parse_type()) 3659 3660 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3661 interval = parse_interval and self._parse_interval() 3662 if interval: 3663 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3664 while True: 3665 index = self._index 3666 self._match(TokenType.PLUS) 3667 3668 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3669 self._retreat(index) 3670 break 3671 3672 interval = self.expression( # type: ignore 3673 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3674 ) 3675 3676 return interval 3677 3678 index = self._index 3679 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3680 this = self._parse_column() 3681 3682 if data_type: 3683 if isinstance(this, exp.Literal): 3684 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3685 if parser: 3686 return parser(self, this, data_type) 3687 return self.expression(exp.Cast, this=this, to=data_type) 3688 if not data_type.expressions: 3689 self._retreat(index) 3690 return self._parse_column() 3691 return self._parse_column_ops(data_type) 3692 3693 return this and self._parse_column_ops(this) 3694 3695 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3696 this = self._parse_type() 3697 if not this: 3698 return None 3699 3700 return self.expression( 3701 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3702 ) 3703 3704 def _parse_types( 3705 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3706 ) -> t.Optional[exp.Expression]: 3707 index = self._index 3708 3709 prefix = self._match_text_seq("SYSUDTLIB", ".") 3710 3711 if not self._match_set(self.TYPE_TOKENS): 3712 identifier = allow_identifiers and self._parse_id_var( 3713 any_token=False, tokens=(TokenType.VAR,) 3714 ) 3715 if identifier: 3716 tokens = self.dialect.tokenize(identifier.name) 3717 3718 if len(tokens) != 1: 3719 self.raise_error("Unexpected identifier", self._prev) 3720 3721 if tokens[0].token_type in self.TYPE_TOKENS: 3722 self._prev = tokens[0] 3723 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3724 type_name = identifier.name 3725 3726 while self._match(TokenType.DOT): 3727 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3728 3729 return exp.DataType.build(type_name, udt=True) 3730 else: 3731 self._retreat(self._index - 1) 3732 return None 3733 else: 3734 return None 3735 3736 type_token = self._prev.token_type 3737 3738 if type_token == TokenType.PSEUDO_TYPE: 3739 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3740 3741 if type_token == TokenType.OBJECT_IDENTIFIER: 3742 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3743 3744 nested = type_token in self.NESTED_TYPE_TOKENS 3745 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3746 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3747 expressions = None 3748 maybe_func = False 3749 3750 if self._match(TokenType.L_PAREN): 3751 if is_struct: 3752 expressions = self._parse_csv(self._parse_struct_types) 3753 elif nested: 3754 expressions = self._parse_csv( 3755 lambda: self._parse_types( 3756 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3757 ) 3758 ) 3759 elif type_token in self.ENUM_TYPE_TOKENS: 3760 expressions = self._parse_csv(self._parse_equality) 3761 elif is_aggregate: 3762 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3763 any_token=False, tokens=(TokenType.VAR,) 3764 ) 3765 if not func_or_ident or not self._match(TokenType.COMMA): 3766 return None 3767 expressions = self._parse_csv( 3768 lambda: self._parse_types( 3769 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3770 ) 3771 ) 3772 expressions.insert(0, func_or_ident) 3773 else: 3774 expressions = self._parse_csv(self._parse_type_size) 3775 3776 if not expressions or not self._match(TokenType.R_PAREN): 3777 self._retreat(index) 3778 return None 3779 3780 maybe_func = True 3781 3782 this: t.Optional[exp.Expression] = None 3783 values: t.Optional[t.List[exp.Expression]] = None 3784 3785 if nested and self._match(TokenType.LT): 3786 if is_struct: 3787 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 3788 else: 3789 expressions = self._parse_csv( 3790 lambda: self._parse_types( 3791 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3792 ) 3793 ) 3794 3795 if not self._match(TokenType.GT): 3796 self.raise_error("Expecting >") 3797 3798 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3799 values = self._parse_csv(self._parse_conjunction) 3800 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3801 3802 if type_token in self.TIMESTAMPS: 3803 if self._match_text_seq("WITH", "TIME", "ZONE"): 3804 maybe_func = False 3805 tz_type = ( 3806 exp.DataType.Type.TIMETZ 3807 if type_token in self.TIMES 3808 else exp.DataType.Type.TIMESTAMPTZ 3809 ) 3810 this = exp.DataType(this=tz_type, expressions=expressions) 3811 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3812 maybe_func = False 3813 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3814 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3815 maybe_func = False 3816 elif type_token == TokenType.INTERVAL: 3817 unit = self._parse_var() 3818 3819 if self._match_text_seq("TO"): 3820 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3821 else: 3822 span = None 3823 3824 if span or not unit: 3825 this = self.expression( 3826 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3827 ) 3828 else: 3829 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3830 3831 if maybe_func and check_func: 3832 index2 = self._index 3833 peek = self._parse_string() 3834 3835 if not peek: 3836 self._retreat(index) 3837 return None 3838 3839 self._retreat(index2) 3840 3841 if not this: 3842 if self._match_text_seq("UNSIGNED"): 3843 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3844 if not unsigned_type_token: 3845 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3846 3847 type_token = unsigned_type_token or type_token 3848 3849 this = exp.DataType( 3850 this=exp.DataType.Type[type_token.value], 3851 expressions=expressions, 3852 nested=nested, 3853 values=values, 3854 prefix=prefix, 3855 ) 3856 3857 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3858 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3859 3860 return this 3861 3862 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 3863 index = self._index 3864 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3865 self._match(TokenType.COLON) 3866 column_def = self._parse_column_def(this) 3867 3868 if type_required and ( 3869 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 3870 ): 3871 self._retreat(index) 3872 return self._parse_types() 3873 3874 return column_def 3875 3876 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3877 if not self._match_text_seq("AT", "TIME", "ZONE"): 3878 return this 3879 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3880 3881 def _parse_column(self) -> t.Optional[exp.Expression]: 3882 this = self._parse_column_reference() 3883 return self._parse_column_ops(this) if this else self._parse_bracket(this) 3884 3885 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 3886 this = self._parse_field() 3887 if ( 3888 not this 3889 and self._match(TokenType.VALUES, advance=False) 3890 and self.VALUES_FOLLOWED_BY_PAREN 3891 and (not self._next or self._next.token_type != TokenType.L_PAREN) 3892 ): 3893 this = self._parse_id_var() 3894 3895 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 3896 3897 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3898 this = self._parse_bracket(this) 3899 3900 while self._match_set(self.COLUMN_OPERATORS): 3901 op_token = self._prev.token_type 3902 op = self.COLUMN_OPERATORS.get(op_token) 3903 3904 if op_token == TokenType.DCOLON: 3905 field = self._parse_types() 3906 if not field: 3907 self.raise_error("Expected type") 3908 elif op and self._curr: 3909 field = self._parse_column_reference() 3910 else: 3911 field = self._parse_field(anonymous_func=True, any_token=True) 3912 3913 if isinstance(field, exp.Func): 3914 # bigquery allows function calls like x.y.count(...) 3915 # SAFE.SUBSTR(...) 3916 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3917 this = self._replace_columns_with_dots(this) 3918 3919 if op: 3920 this = op(self, this, field) 3921 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3922 this = self.expression( 3923 exp.Column, 3924 this=field, 3925 table=this.this, 3926 db=this.args.get("table"), 3927 catalog=this.args.get("db"), 3928 ) 3929 else: 3930 this = self.expression(exp.Dot, this=this, expression=field) 3931 this = self._parse_bracket(this) 3932 return this 3933 3934 def _parse_primary(self) -> t.Optional[exp.Expression]: 3935 if self._match_set(self.PRIMARY_PARSERS): 3936 token_type = self._prev.token_type 3937 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3938 3939 if token_type == TokenType.STRING: 3940 expressions = [primary] 3941 while self._match(TokenType.STRING): 3942 expressions.append(exp.Literal.string(self._prev.text)) 3943 3944 if len(expressions) > 1: 3945 return self.expression(exp.Concat, expressions=expressions) 3946 3947 return primary 3948 3949 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3950 return exp.Literal.number(f"0.{self._prev.text}") 3951 3952 if self._match(TokenType.L_PAREN): 3953 comments = self._prev_comments 3954 query = self._parse_select() 3955 3956 if query: 3957 expressions = [query] 3958 else: 3959 expressions = self._parse_expressions() 3960 3961 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3962 3963 if isinstance(this, exp.Subqueryable): 3964 this = self._parse_set_operations( 3965 self._parse_subquery(this=this, parse_alias=False) 3966 ) 3967 elif len(expressions) > 1: 3968 this = self.expression(exp.Tuple, expressions=expressions) 3969 else: 3970 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3971 3972 if this: 3973 this.add_comments(comments) 3974 3975 self._match_r_paren(expression=this) 3976 return this 3977 3978 return None 3979 3980 def _parse_field( 3981 self, 3982 any_token: bool = False, 3983 tokens: t.Optional[t.Collection[TokenType]] = None, 3984 anonymous_func: bool = False, 3985 ) -> t.Optional[exp.Expression]: 3986 return ( 3987 self._parse_primary() 3988 or self._parse_function(anonymous=anonymous_func) 3989 or self._parse_id_var(any_token=any_token, tokens=tokens) 3990 ) 3991 3992 def _parse_function( 3993 self, 3994 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3995 anonymous: bool = False, 3996 optional_parens: bool = True, 3997 ) -> t.Optional[exp.Expression]: 3998 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3999 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4000 fn_syntax = False 4001 if ( 4002 self._match(TokenType.L_BRACE, advance=False) 4003 and self._next 4004 and self._next.text.upper() == "FN" 4005 ): 4006 self._advance(2) 4007 fn_syntax = True 4008 4009 func = self._parse_function_call( 4010 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4011 ) 4012 4013 if fn_syntax: 4014 self._match(TokenType.R_BRACE) 4015 4016 return func 4017 4018 def _parse_function_call( 4019 self, 4020 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4021 anonymous: bool = False, 4022 optional_parens: bool = True, 4023 ) -> t.Optional[exp.Expression]: 4024 if not self._curr: 4025 return None 4026 4027 comments = self._curr.comments 4028 token_type = self._curr.token_type 4029 this = self._curr.text 4030 upper = this.upper() 4031 4032 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4033 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4034 self._advance() 4035 return parser(self) 4036 4037 if not self._next or self._next.token_type != TokenType.L_PAREN: 4038 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4039 self._advance() 4040 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4041 4042 return None 4043 4044 if token_type not in self.FUNC_TOKENS: 4045 return None 4046 4047 self._advance(2) 4048 4049 parser = self.FUNCTION_PARSERS.get(upper) 4050 if parser and not anonymous: 4051 this = parser(self) 4052 else: 4053 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4054 4055 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4056 this = self.expression(subquery_predicate, this=self._parse_select()) 4057 self._match_r_paren() 4058 return this 4059 4060 if functions is None: 4061 functions = self.FUNCTIONS 4062 4063 function = functions.get(upper) 4064 4065 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4066 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4067 4068 if function and not anonymous: 4069 if "dialect" in function.__code__.co_varnames: 4070 func = function(args, dialect=self.dialect) 4071 else: 4072 func = function(args) 4073 4074 func = self.validate_expression(func, args) 4075 if not self.dialect.NORMALIZE_FUNCTIONS: 4076 func.meta["name"] = this 4077 4078 this = func 4079 else: 4080 this = self.expression(exp.Anonymous, this=this, expressions=args) 4081 4082 if isinstance(this, exp.Expression): 4083 this.add_comments(comments) 4084 4085 self._match_r_paren(this) 4086 return self._parse_window(this) 4087 4088 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4089 return self._parse_column_def(self._parse_id_var()) 4090 4091 def _parse_user_defined_function( 4092 self, kind: t.Optional[TokenType] = None 4093 ) -> t.Optional[exp.Expression]: 4094 this = self._parse_id_var() 4095 4096 while self._match(TokenType.DOT): 4097 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4098 4099 if not self._match(TokenType.L_PAREN): 4100 return this 4101 4102 expressions = self._parse_csv(self._parse_function_parameter) 4103 self._match_r_paren() 4104 return self.expression( 4105 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4106 ) 4107 4108 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4109 literal = self._parse_primary() 4110 if literal: 4111 return self.expression(exp.Introducer, this=token.text, expression=literal) 4112 4113 return self.expression(exp.Identifier, this=token.text) 4114 4115 def _parse_session_parameter(self) -> exp.SessionParameter: 4116 kind = None 4117 this = self._parse_id_var() or self._parse_primary() 4118 4119 if this and self._match(TokenType.DOT): 4120 kind = this.name 4121 this = self._parse_var() or self._parse_primary() 4122 4123 return self.expression(exp.SessionParameter, this=this, kind=kind) 4124 4125 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4126 index = self._index 4127 4128 if self._match(TokenType.L_PAREN): 4129 expressions = t.cast( 4130 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4131 ) 4132 4133 if not self._match(TokenType.R_PAREN): 4134 self._retreat(index) 4135 else: 4136 expressions = [self._parse_id_var()] 4137 4138 if self._match_set(self.LAMBDAS): 4139 return self.LAMBDAS[self._prev.token_type](self, expressions) 4140 4141 self._retreat(index) 4142 4143 this: t.Optional[exp.Expression] 4144 4145 if self._match(TokenType.DISTINCT): 4146 this = self.expression( 4147 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4148 ) 4149 else: 4150 this = self._parse_select_or_expression(alias=alias) 4151 4152 return self._parse_limit( 4153 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4154 ) 4155 4156 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4157 index = self._index 4158 4159 if not self.errors: 4160 try: 4161 if self._parse_select(nested=True): 4162 return this 4163 except ParseError: 4164 pass 4165 finally: 4166 self.errors.clear() 4167 self._retreat(index) 4168 4169 if not self._match(TokenType.L_PAREN): 4170 return this 4171 4172 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4173 4174 self._match_r_paren() 4175 return self.expression(exp.Schema, this=this, expressions=args) 4176 4177 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4178 return self._parse_column_def(self._parse_field(any_token=True)) 4179 4180 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4181 # column defs are not really columns, they're identifiers 4182 if isinstance(this, exp.Column): 4183 this = this.this 4184 4185 kind = self._parse_types(schema=True) 4186 4187 if self._match_text_seq("FOR", "ORDINALITY"): 4188 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4189 4190 constraints: t.List[exp.Expression] = [] 4191 4192 if not kind and self._match(TokenType.ALIAS): 4193 constraints.append( 4194 self.expression( 4195 exp.ComputedColumnConstraint, 4196 this=self._parse_conjunction(), 4197 persisted=self._match_text_seq("PERSISTED"), 4198 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4199 ) 4200 ) 4201 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4202 self._match(TokenType.ALIAS) 4203 constraints.append( 4204 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4205 ) 4206 4207 while True: 4208 constraint = self._parse_column_constraint() 4209 if not constraint: 4210 break 4211 constraints.append(constraint) 4212 4213 if not kind and not constraints: 4214 return this 4215 4216 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4217 4218 def _parse_auto_increment( 4219 self, 4220 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4221 start = None 4222 increment = None 4223 4224 if self._match(TokenType.L_PAREN, advance=False): 4225 args = self._parse_wrapped_csv(self._parse_bitwise) 4226 start = seq_get(args, 0) 4227 increment = seq_get(args, 1) 4228 elif self._match_text_seq("START"): 4229 start = self._parse_bitwise() 4230 self._match_text_seq("INCREMENT") 4231 increment = self._parse_bitwise() 4232 4233 if start and increment: 4234 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4235 4236 return exp.AutoIncrementColumnConstraint() 4237 4238 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4239 if not self._match_text_seq("REFRESH"): 4240 self._retreat(self._index - 1) 4241 return None 4242 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4243 4244 def _parse_compress(self) -> exp.CompressColumnConstraint: 4245 if self._match(TokenType.L_PAREN, advance=False): 4246 return self.expression( 4247 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4248 ) 4249 4250 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4251 4252 def _parse_generated_as_identity( 4253 self, 4254 ) -> ( 4255 exp.GeneratedAsIdentityColumnConstraint 4256 | exp.ComputedColumnConstraint 4257 | exp.GeneratedAsRowColumnConstraint 4258 ): 4259 if self._match_text_seq("BY", "DEFAULT"): 4260 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4261 this = self.expression( 4262 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4263 ) 4264 else: 4265 self._match_text_seq("ALWAYS") 4266 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4267 4268 self._match(TokenType.ALIAS) 4269 4270 if self._match_text_seq("ROW"): 4271 start = self._match_text_seq("START") 4272 if not start: 4273 self._match(TokenType.END) 4274 hidden = self._match_text_seq("HIDDEN") 4275 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4276 4277 identity = self._match_text_seq("IDENTITY") 4278 4279 if self._match(TokenType.L_PAREN): 4280 if self._match(TokenType.START_WITH): 4281 this.set("start", self._parse_bitwise()) 4282 if self._match_text_seq("INCREMENT", "BY"): 4283 this.set("increment", self._parse_bitwise()) 4284 if self._match_text_seq("MINVALUE"): 4285 this.set("minvalue", self._parse_bitwise()) 4286 if self._match_text_seq("MAXVALUE"): 4287 this.set("maxvalue", self._parse_bitwise()) 4288 4289 if self._match_text_seq("CYCLE"): 4290 this.set("cycle", True) 4291 elif self._match_text_seq("NO", "CYCLE"): 4292 this.set("cycle", False) 4293 4294 if not identity: 4295 this.set("expression", self._parse_bitwise()) 4296 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4297 args = self._parse_csv(self._parse_bitwise) 4298 this.set("start", seq_get(args, 0)) 4299 this.set("increment", seq_get(args, 1)) 4300 4301 self._match_r_paren() 4302 4303 return this 4304 4305 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4306 self._match_text_seq("LENGTH") 4307 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4308 4309 def _parse_not_constraint( 4310 self, 4311 ) -> t.Optional[exp.Expression]: 4312 if self._match_text_seq("NULL"): 4313 return self.expression(exp.NotNullColumnConstraint) 4314 if self._match_text_seq("CASESPECIFIC"): 4315 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4316 if self._match_text_seq("FOR", "REPLICATION"): 4317 return self.expression(exp.NotForReplicationColumnConstraint) 4318 return None 4319 4320 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4321 if self._match(TokenType.CONSTRAINT): 4322 this = self._parse_id_var() 4323 else: 4324 this = None 4325 4326 if self._match_texts(self.CONSTRAINT_PARSERS): 4327 return self.expression( 4328 exp.ColumnConstraint, 4329 this=this, 4330 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4331 ) 4332 4333 return this 4334 4335 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4336 if not self._match(TokenType.CONSTRAINT): 4337 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4338 4339 this = self._parse_id_var() 4340 expressions = [] 4341 4342 while True: 4343 constraint = self._parse_unnamed_constraint() or self._parse_function() 4344 if not constraint: 4345 break 4346 expressions.append(constraint) 4347 4348 return self.expression(exp.Constraint, this=this, expressions=expressions) 4349 4350 def _parse_unnamed_constraint( 4351 self, constraints: t.Optional[t.Collection[str]] = None 4352 ) -> t.Optional[exp.Expression]: 4353 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4354 constraints or self.CONSTRAINT_PARSERS 4355 ): 4356 return None 4357 4358 constraint = self._prev.text.upper() 4359 if constraint not in self.CONSTRAINT_PARSERS: 4360 self.raise_error(f"No parser found for schema constraint {constraint}.") 4361 4362 return self.CONSTRAINT_PARSERS[constraint](self) 4363 4364 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4365 self._match_text_seq("KEY") 4366 return self.expression( 4367 exp.UniqueColumnConstraint, 4368 this=self._parse_schema(self._parse_id_var(any_token=False)), 4369 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4370 ) 4371 4372 def _parse_key_constraint_options(self) -> t.List[str]: 4373 options = [] 4374 while True: 4375 if not self._curr: 4376 break 4377 4378 if self._match(TokenType.ON): 4379 action = None 4380 on = self._advance_any() and self._prev.text 4381 4382 if self._match_text_seq("NO", "ACTION"): 4383 action = "NO ACTION" 4384 elif self._match_text_seq("CASCADE"): 4385 action = "CASCADE" 4386 elif self._match_text_seq("RESTRICT"): 4387 action = "RESTRICT" 4388 elif self._match_pair(TokenType.SET, TokenType.NULL): 4389 action = "SET NULL" 4390 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4391 action = "SET DEFAULT" 4392 else: 4393 self.raise_error("Invalid key constraint") 4394 4395 options.append(f"ON {on} {action}") 4396 elif self._match_text_seq("NOT", "ENFORCED"): 4397 options.append("NOT ENFORCED") 4398 elif self._match_text_seq("DEFERRABLE"): 4399 options.append("DEFERRABLE") 4400 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4401 options.append("INITIALLY DEFERRED") 4402 elif self._match_text_seq("NORELY"): 4403 options.append("NORELY") 4404 elif self._match_text_seq("MATCH", "FULL"): 4405 options.append("MATCH FULL") 4406 else: 4407 break 4408 4409 return options 4410 4411 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4412 if match and not self._match(TokenType.REFERENCES): 4413 return None 4414 4415 expressions = None 4416 this = self._parse_table(schema=True) 4417 options = self._parse_key_constraint_options() 4418 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4419 4420 def _parse_foreign_key(self) -> exp.ForeignKey: 4421 expressions = self._parse_wrapped_id_vars() 4422 reference = self._parse_references() 4423 options = {} 4424 4425 while self._match(TokenType.ON): 4426 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4427 self.raise_error("Expected DELETE or UPDATE") 4428 4429 kind = self._prev.text.lower() 4430 4431 if self._match_text_seq("NO", "ACTION"): 4432 action = "NO ACTION" 4433 elif self._match(TokenType.SET): 4434 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4435 action = "SET " + self._prev.text.upper() 4436 else: 4437 self._advance() 4438 action = self._prev.text.upper() 4439 4440 options[kind] = action 4441 4442 return self.expression( 4443 exp.ForeignKey, 4444 expressions=expressions, 4445 reference=reference, 4446 **options, # type: ignore 4447 ) 4448 4449 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4450 return self._parse_field() 4451 4452 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4453 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4454 self._retreat(self._index - 1) 4455 return None 4456 4457 id_vars = self._parse_wrapped_id_vars() 4458 return self.expression( 4459 exp.PeriodForSystemTimeConstraint, 4460 this=seq_get(id_vars, 0), 4461 expression=seq_get(id_vars, 1), 4462 ) 4463 4464 def _parse_primary_key( 4465 self, wrapped_optional: bool = False, in_props: bool = False 4466 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4467 desc = ( 4468 self._match_set((TokenType.ASC, TokenType.DESC)) 4469 and self._prev.token_type == TokenType.DESC 4470 ) 4471 4472 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4473 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4474 4475 expressions = self._parse_wrapped_csv( 4476 self._parse_primary_key_part, optional=wrapped_optional 4477 ) 4478 options = self._parse_key_constraint_options() 4479 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4480 4481 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4482 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4483 4484 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4485 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4486 return this 4487 4488 bracket_kind = self._prev.token_type 4489 expressions = self._parse_csv( 4490 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4491 ) 4492 4493 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4494 self.raise_error("Expected ]") 4495 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4496 self.raise_error("Expected }") 4497 4498 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4499 if bracket_kind == TokenType.L_BRACE: 4500 this = self.expression(exp.Struct, expressions=expressions) 4501 elif not this or this.name.upper() == "ARRAY": 4502 this = self.expression(exp.Array, expressions=expressions) 4503 else: 4504 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4505 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4506 4507 self._add_comments(this) 4508 return self._parse_bracket(this) 4509 4510 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4511 if self._match(TokenType.COLON): 4512 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4513 return this 4514 4515 def _parse_case(self) -> t.Optional[exp.Expression]: 4516 ifs = [] 4517 default = None 4518 4519 comments = self._prev_comments 4520 expression = self._parse_conjunction() 4521 4522 while self._match(TokenType.WHEN): 4523 this = self._parse_conjunction() 4524 self._match(TokenType.THEN) 4525 then = self._parse_conjunction() 4526 ifs.append(self.expression(exp.If, this=this, true=then)) 4527 4528 if self._match(TokenType.ELSE): 4529 default = self._parse_conjunction() 4530 4531 if not self._match(TokenType.END): 4532 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4533 default = exp.column("interval") 4534 else: 4535 self.raise_error("Expected END after CASE", self._prev) 4536 4537 return self._parse_window( 4538 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4539 ) 4540 4541 def _parse_if(self) -> t.Optional[exp.Expression]: 4542 if self._match(TokenType.L_PAREN): 4543 args = self._parse_csv(self._parse_conjunction) 4544 this = self.validate_expression(exp.If.from_arg_list(args), args) 4545 self._match_r_paren() 4546 else: 4547 index = self._index - 1 4548 4549 if self.NO_PAREN_IF_COMMANDS and index == 0: 4550 return self._parse_as_command(self._prev) 4551 4552 condition = self._parse_conjunction() 4553 4554 if not condition: 4555 self._retreat(index) 4556 return None 4557 4558 self._match(TokenType.THEN) 4559 true = self._parse_conjunction() 4560 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4561 self._match(TokenType.END) 4562 this = self.expression(exp.If, this=condition, true=true, false=false) 4563 4564 return self._parse_window(this) 4565 4566 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4567 if not self._match_text_seq("VALUE", "FOR"): 4568 self._retreat(self._index - 1) 4569 return None 4570 4571 return self.expression( 4572 exp.NextValueFor, 4573 this=self._parse_column(), 4574 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4575 ) 4576 4577 def _parse_extract(self) -> exp.Extract: 4578 this = self._parse_function() or self._parse_var() or self._parse_type() 4579 4580 if self._match(TokenType.FROM): 4581 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4582 4583 if not self._match(TokenType.COMMA): 4584 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4585 4586 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4587 4588 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4589 this = self._parse_conjunction() 4590 4591 if not self._match(TokenType.ALIAS): 4592 if self._match(TokenType.COMMA): 4593 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4594 4595 self.raise_error("Expected AS after CAST") 4596 4597 fmt = None 4598 to = self._parse_types() 4599 4600 if self._match(TokenType.FORMAT): 4601 fmt_string = self._parse_string() 4602 fmt = self._parse_at_time_zone(fmt_string) 4603 4604 if not to: 4605 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4606 if to.this in exp.DataType.TEMPORAL_TYPES: 4607 this = self.expression( 4608 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4609 this=this, 4610 format=exp.Literal.string( 4611 format_time( 4612 fmt_string.this if fmt_string else "", 4613 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4614 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4615 ) 4616 ), 4617 ) 4618 4619 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4620 this.set("zone", fmt.args["zone"]) 4621 return this 4622 elif not to: 4623 self.raise_error("Expected TYPE after CAST") 4624 elif isinstance(to, exp.Identifier): 4625 to = exp.DataType.build(to.name, udt=True) 4626 elif to.this == exp.DataType.Type.CHAR: 4627 if self._match(TokenType.CHARACTER_SET): 4628 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4629 4630 return self.expression( 4631 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4632 ) 4633 4634 def _parse_string_agg(self) -> exp.Expression: 4635 if self._match(TokenType.DISTINCT): 4636 args: t.List[t.Optional[exp.Expression]] = [ 4637 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4638 ] 4639 if self._match(TokenType.COMMA): 4640 args.extend(self._parse_csv(self._parse_conjunction)) 4641 else: 4642 args = self._parse_csv(self._parse_conjunction) # type: ignore 4643 4644 index = self._index 4645 if not self._match(TokenType.R_PAREN) and args: 4646 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4647 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4648 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4649 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4650 4651 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4652 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4653 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4654 if not self._match_text_seq("WITHIN", "GROUP"): 4655 self._retreat(index) 4656 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4657 4658 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4659 order = self._parse_order(this=seq_get(args, 0)) 4660 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4661 4662 def _parse_convert( 4663 self, strict: bool, safe: t.Optional[bool] = None 4664 ) -> t.Optional[exp.Expression]: 4665 this = self._parse_bitwise() 4666 4667 if self._match(TokenType.USING): 4668 to: t.Optional[exp.Expression] = self.expression( 4669 exp.CharacterSet, this=self._parse_var() 4670 ) 4671 elif self._match(TokenType.COMMA): 4672 to = self._parse_types() 4673 else: 4674 to = None 4675 4676 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4677 4678 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4679 """ 4680 There are generally two variants of the DECODE function: 4681 4682 - DECODE(bin, charset) 4683 - DECODE(expression, search, result [, search, result] ... [, default]) 4684 4685 The second variant will always be parsed into a CASE expression. Note that NULL 4686 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4687 instead of relying on pattern matching. 4688 """ 4689 args = self._parse_csv(self._parse_conjunction) 4690 4691 if len(args) < 3: 4692 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4693 4694 expression, *expressions = args 4695 if not expression: 4696 return None 4697 4698 ifs = [] 4699 for search, result in zip(expressions[::2], expressions[1::2]): 4700 if not search or not result: 4701 return None 4702 4703 if isinstance(search, exp.Literal): 4704 ifs.append( 4705 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4706 ) 4707 elif isinstance(search, exp.Null): 4708 ifs.append( 4709 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4710 ) 4711 else: 4712 cond = exp.or_( 4713 exp.EQ(this=expression.copy(), expression=search), 4714 exp.and_( 4715 exp.Is(this=expression.copy(), expression=exp.Null()), 4716 exp.Is(this=search.copy(), expression=exp.Null()), 4717 copy=False, 4718 ), 4719 copy=False, 4720 ) 4721 ifs.append(exp.If(this=cond, true=result)) 4722 4723 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4724 4725 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4726 self._match_text_seq("KEY") 4727 key = self._parse_column() 4728 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4729 self._match_text_seq("VALUE") 4730 value = self._parse_bitwise() 4731 4732 if not key and not value: 4733 return None 4734 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4735 4736 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4737 if not this or not self._match_text_seq("FORMAT", "JSON"): 4738 return this 4739 4740 return self.expression(exp.FormatJson, this=this) 4741 4742 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4743 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4744 for value in values: 4745 if self._match_text_seq(value, "ON", on): 4746 return f"{value} ON {on}" 4747 4748 return None 4749 4750 @t.overload 4751 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 4752 ... 4753 4754 @t.overload 4755 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 4756 ... 4757 4758 def _parse_json_object(self, agg=False): 4759 star = self._parse_star() 4760 expressions = ( 4761 [star] 4762 if star 4763 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4764 ) 4765 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4766 4767 unique_keys = None 4768 if self._match_text_seq("WITH", "UNIQUE"): 4769 unique_keys = True 4770 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4771 unique_keys = False 4772 4773 self._match_text_seq("KEYS") 4774 4775 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4776 self._parse_type() 4777 ) 4778 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4779 4780 return self.expression( 4781 exp.JSONObjectAgg if agg else exp.JSONObject, 4782 expressions=expressions, 4783 null_handling=null_handling, 4784 unique_keys=unique_keys, 4785 return_type=return_type, 4786 encoding=encoding, 4787 ) 4788 4789 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4790 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4791 if not self._match_text_seq("NESTED"): 4792 this = self._parse_id_var() 4793 kind = self._parse_types(allow_identifiers=False) 4794 nested = None 4795 else: 4796 this = None 4797 kind = None 4798 nested = True 4799 4800 path = self._match_text_seq("PATH") and self._parse_string() 4801 nested_schema = nested and self._parse_json_schema() 4802 4803 return self.expression( 4804 exp.JSONColumnDef, 4805 this=this, 4806 kind=kind, 4807 path=path, 4808 nested_schema=nested_schema, 4809 ) 4810 4811 def _parse_json_schema(self) -> exp.JSONSchema: 4812 self._match_text_seq("COLUMNS") 4813 return self.expression( 4814 exp.JSONSchema, 4815 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4816 ) 4817 4818 def _parse_json_table(self) -> exp.JSONTable: 4819 this = self._parse_format_json(self._parse_bitwise()) 4820 path = self._match(TokenType.COMMA) and self._parse_string() 4821 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4822 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4823 schema = self._parse_json_schema() 4824 4825 return exp.JSONTable( 4826 this=this, 4827 schema=schema, 4828 path=path, 4829 error_handling=error_handling, 4830 empty_handling=empty_handling, 4831 ) 4832 4833 def _parse_match_against(self) -> exp.MatchAgainst: 4834 expressions = self._parse_csv(self._parse_column) 4835 4836 self._match_text_seq(")", "AGAINST", "(") 4837 4838 this = self._parse_string() 4839 4840 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4841 modifier = "IN NATURAL LANGUAGE MODE" 4842 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4843 modifier = f"{modifier} WITH QUERY EXPANSION" 4844 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4845 modifier = "IN BOOLEAN MODE" 4846 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4847 modifier = "WITH QUERY EXPANSION" 4848 else: 4849 modifier = None 4850 4851 return self.expression( 4852 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4853 ) 4854 4855 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4856 def _parse_open_json(self) -> exp.OpenJSON: 4857 this = self._parse_bitwise() 4858 path = self._match(TokenType.COMMA) and self._parse_string() 4859 4860 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4861 this = self._parse_field(any_token=True) 4862 kind = self._parse_types() 4863 path = self._parse_string() 4864 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4865 4866 return self.expression( 4867 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4868 ) 4869 4870 expressions = None 4871 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4872 self._match_l_paren() 4873 expressions = self._parse_csv(_parse_open_json_column_def) 4874 4875 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4876 4877 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4878 args = self._parse_csv(self._parse_bitwise) 4879 4880 if self._match(TokenType.IN): 4881 return self.expression( 4882 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4883 ) 4884 4885 if haystack_first: 4886 haystack = seq_get(args, 0) 4887 needle = seq_get(args, 1) 4888 else: 4889 needle = seq_get(args, 0) 4890 haystack = seq_get(args, 1) 4891 4892 return self.expression( 4893 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4894 ) 4895 4896 def _parse_predict(self) -> exp.Predict: 4897 self._match_text_seq("MODEL") 4898 this = self._parse_table() 4899 4900 self._match(TokenType.COMMA) 4901 self._match_text_seq("TABLE") 4902 4903 return self.expression( 4904 exp.Predict, 4905 this=this, 4906 expression=self._parse_table(), 4907 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4908 ) 4909 4910 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4911 args = self._parse_csv(self._parse_table) 4912 return exp.JoinHint(this=func_name.upper(), expressions=args) 4913 4914 def _parse_substring(self) -> exp.Substring: 4915 # Postgres supports the form: substring(string [from int] [for int]) 4916 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4917 4918 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4919 4920 if self._match(TokenType.FROM): 4921 args.append(self._parse_bitwise()) 4922 if self._match(TokenType.FOR): 4923 args.append(self._parse_bitwise()) 4924 4925 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4926 4927 def _parse_trim(self) -> exp.Trim: 4928 # https://www.w3resource.com/sql/character-functions/trim.php 4929 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4930 4931 position = None 4932 collation = None 4933 expression = None 4934 4935 if self._match_texts(self.TRIM_TYPES): 4936 position = self._prev.text.upper() 4937 4938 this = self._parse_bitwise() 4939 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4940 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4941 expression = self._parse_bitwise() 4942 4943 if invert_order: 4944 this, expression = expression, this 4945 4946 if self._match(TokenType.COLLATE): 4947 collation = self._parse_bitwise() 4948 4949 return self.expression( 4950 exp.Trim, this=this, position=position, expression=expression, collation=collation 4951 ) 4952 4953 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4954 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4955 4956 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4957 return self._parse_window(self._parse_id_var(), alias=True) 4958 4959 def _parse_respect_or_ignore_nulls( 4960 self, this: t.Optional[exp.Expression] 4961 ) -> t.Optional[exp.Expression]: 4962 if self._match_text_seq("IGNORE", "NULLS"): 4963 return self.expression(exp.IgnoreNulls, this=this) 4964 if self._match_text_seq("RESPECT", "NULLS"): 4965 return self.expression(exp.RespectNulls, this=this) 4966 return this 4967 4968 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4969 if self._match(TokenType.HAVING): 4970 self._match_texts(("MAX", "MIN")) 4971 max = self._prev.text.upper() != "MIN" 4972 return self.expression( 4973 exp.HavingMax, this=this, expression=self._parse_column(), max=max 4974 ) 4975 4976 return this 4977 4978 def _parse_window( 4979 self, this: t.Optional[exp.Expression], alias: bool = False 4980 ) -> t.Optional[exp.Expression]: 4981 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4982 self._match(TokenType.WHERE) 4983 this = self.expression( 4984 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4985 ) 4986 self._match_r_paren() 4987 4988 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4989 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4990 if self._match_text_seq("WITHIN", "GROUP"): 4991 order = self._parse_wrapped(self._parse_order) 4992 this = self.expression(exp.WithinGroup, this=this, expression=order) 4993 4994 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4995 # Some dialects choose to implement and some do not. 4996 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4997 4998 # There is some code above in _parse_lambda that handles 4999 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5000 5001 # The below changes handle 5002 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5003 5004 # Oracle allows both formats 5005 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5006 # and Snowflake chose to do the same for familiarity 5007 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5008 if isinstance(this, exp.AggFunc): 5009 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5010 5011 if ignore_respect and ignore_respect is not this: 5012 ignore_respect.replace(ignore_respect.this) 5013 this = self.expression(ignore_respect.__class__, this=this) 5014 5015 this = self._parse_respect_or_ignore_nulls(this) 5016 5017 # bigquery select from window x AS (partition by ...) 5018 if alias: 5019 over = None 5020 self._match(TokenType.ALIAS) 5021 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5022 return this 5023 else: 5024 over = self._prev.text.upper() 5025 5026 if not self._match(TokenType.L_PAREN): 5027 return self.expression( 5028 exp.Window, this=this, alias=self._parse_id_var(False), over=over 5029 ) 5030 5031 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5032 5033 first = self._match(TokenType.FIRST) 5034 if self._match_text_seq("LAST"): 5035 first = False 5036 5037 partition, order = self._parse_partition_and_order() 5038 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5039 5040 if kind: 5041 self._match(TokenType.BETWEEN) 5042 start = self._parse_window_spec() 5043 self._match(TokenType.AND) 5044 end = self._parse_window_spec() 5045 5046 spec = self.expression( 5047 exp.WindowSpec, 5048 kind=kind, 5049 start=start["value"], 5050 start_side=start["side"], 5051 end=end["value"], 5052 end_side=end["side"], 5053 ) 5054 else: 5055 spec = None 5056 5057 self._match_r_paren() 5058 5059 window = self.expression( 5060 exp.Window, 5061 this=this, 5062 partition_by=partition, 5063 order=order, 5064 spec=spec, 5065 alias=window_alias, 5066 over=over, 5067 first=first, 5068 ) 5069 5070 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5071 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5072 return self._parse_window(window, alias=alias) 5073 5074 return window 5075 5076 def _parse_partition_and_order( 5077 self, 5078 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5079 return self._parse_partition_by(), self._parse_order() 5080 5081 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5082 self._match(TokenType.BETWEEN) 5083 5084 return { 5085 "value": ( 5086 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5087 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5088 or self._parse_bitwise() 5089 ), 5090 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5091 } 5092 5093 def _parse_alias( 5094 self, this: t.Optional[exp.Expression], explicit: bool = False 5095 ) -> t.Optional[exp.Expression]: 5096 any_token = self._match(TokenType.ALIAS) 5097 comments = self._prev_comments 5098 5099 if explicit and not any_token: 5100 return this 5101 5102 if self._match(TokenType.L_PAREN): 5103 aliases = self.expression( 5104 exp.Aliases, 5105 comments=comments, 5106 this=this, 5107 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5108 ) 5109 self._match_r_paren(aliases) 5110 return aliases 5111 5112 alias = self._parse_id_var(any_token) or ( 5113 self.STRING_ALIASES and self._parse_string_as_identifier() 5114 ) 5115 5116 if alias: 5117 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5118 column = this.this 5119 5120 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5121 if not this.comments and column and column.comments: 5122 this.comments = column.comments 5123 column.comments = None 5124 5125 return this 5126 5127 def _parse_id_var( 5128 self, 5129 any_token: bool = True, 5130 tokens: t.Optional[t.Collection[TokenType]] = None, 5131 ) -> t.Optional[exp.Expression]: 5132 identifier = self._parse_identifier() 5133 5134 if identifier: 5135 return identifier 5136 5137 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5138 quoted = self._prev.token_type == TokenType.STRING 5139 return exp.Identifier(this=self._prev.text, quoted=quoted) 5140 5141 return None 5142 5143 def _parse_string(self) -> t.Optional[exp.Expression]: 5144 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 5145 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 5146 return self._parse_placeholder() 5147 5148 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5149 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5150 5151 def _parse_number(self) -> t.Optional[exp.Expression]: 5152 if self._match(TokenType.NUMBER): 5153 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 5154 return self._parse_placeholder() 5155 5156 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5157 if self._match(TokenType.IDENTIFIER): 5158 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5159 return self._parse_placeholder() 5160 5161 def _parse_var( 5162 self, 5163 any_token: bool = False, 5164 tokens: t.Optional[t.Collection[TokenType]] = None, 5165 upper: bool = False, 5166 ) -> t.Optional[exp.Expression]: 5167 if ( 5168 (any_token and self._advance_any()) 5169 or self._match(TokenType.VAR) 5170 or (self._match_set(tokens) if tokens else False) 5171 ): 5172 return self.expression( 5173 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5174 ) 5175 return self._parse_placeholder() 5176 5177 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5178 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5179 self._advance() 5180 return self._prev 5181 return None 5182 5183 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5184 return self._parse_var() or self._parse_string() 5185 5186 def _parse_null(self) -> t.Optional[exp.Expression]: 5187 if self._match_set(self.NULL_TOKENS): 5188 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5189 return self._parse_placeholder() 5190 5191 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5192 if self._match(TokenType.TRUE): 5193 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5194 if self._match(TokenType.FALSE): 5195 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5196 return self._parse_placeholder() 5197 5198 def _parse_star(self) -> t.Optional[exp.Expression]: 5199 if self._match(TokenType.STAR): 5200 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5201 return self._parse_placeholder() 5202 5203 def _parse_parameter(self) -> exp.Parameter: 5204 def _parse_parameter_part() -> t.Optional[exp.Expression]: 5205 return ( 5206 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 5207 ) 5208 5209 self._match(TokenType.L_BRACE) 5210 this = _parse_parameter_part() 5211 expression = self._match(TokenType.COLON) and _parse_parameter_part() 5212 self._match(TokenType.R_BRACE) 5213 5214 return self.expression(exp.Parameter, this=this, expression=expression) 5215 5216 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5217 if self._match_set(self.PLACEHOLDER_PARSERS): 5218 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5219 if placeholder: 5220 return placeholder 5221 self._advance(-1) 5222 return None 5223 5224 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5225 if not self._match(TokenType.EXCEPT): 5226 return None 5227 if self._match(TokenType.L_PAREN, advance=False): 5228 return self._parse_wrapped_csv(self._parse_column) 5229 5230 except_column = self._parse_column() 5231 return [except_column] if except_column else None 5232 5233 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5234 if not self._match(TokenType.REPLACE): 5235 return None 5236 if self._match(TokenType.L_PAREN, advance=False): 5237 return self._parse_wrapped_csv(self._parse_expression) 5238 5239 replace_expression = self._parse_expression() 5240 return [replace_expression] if replace_expression else None 5241 5242 def _parse_csv( 5243 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5244 ) -> t.List[exp.Expression]: 5245 parse_result = parse_method() 5246 items = [parse_result] if parse_result is not None else [] 5247 5248 while self._match(sep): 5249 self._add_comments(parse_result) 5250 parse_result = parse_method() 5251 if parse_result is not None: 5252 items.append(parse_result) 5253 5254 return items 5255 5256 def _parse_tokens( 5257 self, parse_method: t.Callable, expressions: t.Dict 5258 ) -> t.Optional[exp.Expression]: 5259 this = parse_method() 5260 5261 while self._match_set(expressions): 5262 this = self.expression( 5263 expressions[self._prev.token_type], 5264 this=this, 5265 comments=self._prev_comments, 5266 expression=parse_method(), 5267 ) 5268 5269 return this 5270 5271 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5272 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5273 5274 def _parse_wrapped_csv( 5275 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5276 ) -> t.List[exp.Expression]: 5277 return self._parse_wrapped( 5278 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5279 ) 5280 5281 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5282 wrapped = self._match(TokenType.L_PAREN) 5283 if not wrapped and not optional: 5284 self.raise_error("Expecting (") 5285 parse_result = parse_method() 5286 if wrapped: 5287 self._match_r_paren() 5288 return parse_result 5289 5290 def _parse_expressions(self) -> t.List[exp.Expression]: 5291 return self._parse_csv(self._parse_expression) 5292 5293 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5294 return self._parse_select() or self._parse_set_operations( 5295 self._parse_expression() if alias else self._parse_conjunction() 5296 ) 5297 5298 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5299 return self._parse_query_modifiers( 5300 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5301 ) 5302 5303 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5304 this = None 5305 if self._match_texts(self.TRANSACTION_KIND): 5306 this = self._prev.text 5307 5308 self._match_texts(("TRANSACTION", "WORK")) 5309 5310 modes = [] 5311 while True: 5312 mode = [] 5313 while self._match(TokenType.VAR): 5314 mode.append(self._prev.text) 5315 5316 if mode: 5317 modes.append(" ".join(mode)) 5318 if not self._match(TokenType.COMMA): 5319 break 5320 5321 return self.expression(exp.Transaction, this=this, modes=modes) 5322 5323 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5324 chain = None 5325 savepoint = None 5326 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5327 5328 self._match_texts(("TRANSACTION", "WORK")) 5329 5330 if self._match_text_seq("TO"): 5331 self._match_text_seq("SAVEPOINT") 5332 savepoint = self._parse_id_var() 5333 5334 if self._match(TokenType.AND): 5335 chain = not self._match_text_seq("NO") 5336 self._match_text_seq("CHAIN") 5337 5338 if is_rollback: 5339 return self.expression(exp.Rollback, savepoint=savepoint) 5340 5341 return self.expression(exp.Commit, chain=chain) 5342 5343 def _parse_refresh(self) -> exp.Refresh: 5344 self._match(TokenType.TABLE) 5345 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5346 5347 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5348 if not self._match_text_seq("ADD"): 5349 return None 5350 5351 self._match(TokenType.COLUMN) 5352 exists_column = self._parse_exists(not_=True) 5353 expression = self._parse_field_def() 5354 5355 if expression: 5356 expression.set("exists", exists_column) 5357 5358 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5359 if self._match_texts(("FIRST", "AFTER")): 5360 position = self._prev.text 5361 column_position = self.expression( 5362 exp.ColumnPosition, this=self._parse_column(), position=position 5363 ) 5364 expression.set("position", column_position) 5365 5366 return expression 5367 5368 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5369 drop = self._match(TokenType.DROP) and self._parse_drop() 5370 if drop and not isinstance(drop, exp.Command): 5371 drop.set("kind", drop.args.get("kind", "COLUMN")) 5372 return drop 5373 5374 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5375 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5376 return self.expression( 5377 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5378 ) 5379 5380 def _parse_add_constraint(self) -> exp.AddConstraint: 5381 this = None 5382 kind = self._prev.token_type 5383 5384 if kind == TokenType.CONSTRAINT: 5385 this = self._parse_id_var() 5386 5387 if self._match_text_seq("CHECK"): 5388 expression = self._parse_wrapped(self._parse_conjunction) 5389 enforced = self._match_text_seq("ENFORCED") or False 5390 5391 return self.expression( 5392 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5393 ) 5394 5395 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5396 expression = self._parse_foreign_key() 5397 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5398 expression = self._parse_primary_key() 5399 else: 5400 expression = None 5401 5402 return self.expression(exp.AddConstraint, this=this, expression=expression) 5403 5404 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5405 index = self._index - 1 5406 5407 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5408 return self._parse_csv(self._parse_add_constraint) 5409 5410 self._retreat(index) 5411 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5412 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5413 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5414 5415 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5416 self._match(TokenType.COLUMN) 5417 column = self._parse_field(any_token=True) 5418 5419 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5420 return self.expression(exp.AlterColumn, this=column, drop=True) 5421 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5422 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5423 if self._match(TokenType.COMMENT): 5424 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5425 5426 self._match_text_seq("SET", "DATA") 5427 return self.expression( 5428 exp.AlterColumn, 5429 this=column, 5430 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5431 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5432 using=self._match(TokenType.USING) and self._parse_conjunction(), 5433 ) 5434 5435 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5436 index = self._index - 1 5437 5438 partition_exists = self._parse_exists() 5439 if self._match(TokenType.PARTITION, advance=False): 5440 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5441 5442 self._retreat(index) 5443 return self._parse_csv(self._parse_drop_column) 5444 5445 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5446 if self._match(TokenType.COLUMN): 5447 exists = self._parse_exists() 5448 old_column = self._parse_column() 5449 to = self._match_text_seq("TO") 5450 new_column = self._parse_column() 5451 5452 if old_column is None or to is None or new_column is None: 5453 return None 5454 5455 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5456 5457 self._match_text_seq("TO") 5458 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5459 5460 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5461 start = self._prev 5462 5463 if not self._match(TokenType.TABLE): 5464 return self._parse_as_command(start) 5465 5466 exists = self._parse_exists() 5467 only = self._match_text_seq("ONLY") 5468 this = self._parse_table(schema=True) 5469 5470 if self._next: 5471 self._advance() 5472 5473 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5474 if parser: 5475 actions = ensure_list(parser(self)) 5476 5477 if not self._curr and actions: 5478 return self.expression( 5479 exp.AlterTable, 5480 this=this, 5481 exists=exists, 5482 actions=actions, 5483 only=only, 5484 ) 5485 5486 return self._parse_as_command(start) 5487 5488 def _parse_merge(self) -> exp.Merge: 5489 self._match(TokenType.INTO) 5490 target = self._parse_table() 5491 5492 if target and self._match(TokenType.ALIAS, advance=False): 5493 target.set("alias", self._parse_table_alias()) 5494 5495 self._match(TokenType.USING) 5496 using = self._parse_table() 5497 5498 self._match(TokenType.ON) 5499 on = self._parse_conjunction() 5500 5501 return self.expression( 5502 exp.Merge, 5503 this=target, 5504 using=using, 5505 on=on, 5506 expressions=self._parse_when_matched(), 5507 ) 5508 5509 def _parse_when_matched(self) -> t.List[exp.When]: 5510 whens = [] 5511 5512 while self._match(TokenType.WHEN): 5513 matched = not self._match(TokenType.NOT) 5514 self._match_text_seq("MATCHED") 5515 source = ( 5516 False 5517 if self._match_text_seq("BY", "TARGET") 5518 else self._match_text_seq("BY", "SOURCE") 5519 ) 5520 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5521 5522 self._match(TokenType.THEN) 5523 5524 if self._match(TokenType.INSERT): 5525 _this = self._parse_star() 5526 if _this: 5527 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5528 else: 5529 then = self.expression( 5530 exp.Insert, 5531 this=self._parse_value(), 5532 expression=self._match_text_seq("VALUES") and self._parse_value(), 5533 ) 5534 elif self._match(TokenType.UPDATE): 5535 expressions = self._parse_star() 5536 if expressions: 5537 then = self.expression(exp.Update, expressions=expressions) 5538 else: 5539 then = self.expression( 5540 exp.Update, 5541 expressions=self._match(TokenType.SET) 5542 and self._parse_csv(self._parse_equality), 5543 ) 5544 elif self._match(TokenType.DELETE): 5545 then = self.expression(exp.Var, this=self._prev.text) 5546 else: 5547 then = None 5548 5549 whens.append( 5550 self.expression( 5551 exp.When, 5552 matched=matched, 5553 source=source, 5554 condition=condition, 5555 then=then, 5556 ) 5557 ) 5558 return whens 5559 5560 def _parse_show(self) -> t.Optional[exp.Expression]: 5561 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5562 if parser: 5563 return parser(self) 5564 return self._parse_as_command(self._prev) 5565 5566 def _parse_set_item_assignment( 5567 self, kind: t.Optional[str] = None 5568 ) -> t.Optional[exp.Expression]: 5569 index = self._index 5570 5571 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5572 return self._parse_set_transaction(global_=kind == "GLOBAL") 5573 5574 left = self._parse_primary() or self._parse_id_var() 5575 assignment_delimiter = self._match_texts(("=", "TO")) 5576 5577 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5578 self._retreat(index) 5579 return None 5580 5581 right = self._parse_statement() or self._parse_id_var() 5582 this = self.expression(exp.EQ, this=left, expression=right) 5583 5584 return self.expression(exp.SetItem, this=this, kind=kind) 5585 5586 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5587 self._match_text_seq("TRANSACTION") 5588 characteristics = self._parse_csv( 5589 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5590 ) 5591 return self.expression( 5592 exp.SetItem, 5593 expressions=characteristics, 5594 kind="TRANSACTION", 5595 **{"global": global_}, # type: ignore 5596 ) 5597 5598 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5599 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5600 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5601 5602 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5603 index = self._index 5604 set_ = self.expression( 5605 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5606 ) 5607 5608 if self._curr: 5609 self._retreat(index) 5610 return self._parse_as_command(self._prev) 5611 5612 return set_ 5613 5614 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5615 for option in options: 5616 if self._match_text_seq(*option.split(" ")): 5617 return exp.var(option) 5618 return None 5619 5620 def _parse_as_command(self, start: Token) -> exp.Command: 5621 while self._curr: 5622 self._advance() 5623 text = self._find_sql(start, self._prev) 5624 size = len(start.text) 5625 self._warn_unsupported() 5626 return exp.Command(this=text[:size], expression=text[size:]) 5627 5628 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5629 settings = [] 5630 5631 self._match_l_paren() 5632 kind = self._parse_id_var() 5633 5634 if self._match(TokenType.L_PAREN): 5635 while True: 5636 key = self._parse_id_var() 5637 value = self._parse_primary() 5638 5639 if not key and value is None: 5640 break 5641 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5642 self._match(TokenType.R_PAREN) 5643 5644 self._match_r_paren() 5645 5646 return self.expression( 5647 exp.DictProperty, 5648 this=this, 5649 kind=kind.this if kind else None, 5650 settings=settings, 5651 ) 5652 5653 def _parse_dict_range(self, this: str) -> exp.DictRange: 5654 self._match_l_paren() 5655 has_min = self._match_text_seq("MIN") 5656 if has_min: 5657 min = self._parse_var() or self._parse_primary() 5658 self._match_text_seq("MAX") 5659 max = self._parse_var() or self._parse_primary() 5660 else: 5661 max = self._parse_var() or self._parse_primary() 5662 min = exp.Literal.number(0) 5663 self._match_r_paren() 5664 return self.expression(exp.DictRange, this=this, min=min, max=max) 5665 5666 def _parse_comprehension( 5667 self, this: t.Optional[exp.Expression] 5668 ) -> t.Optional[exp.Comprehension]: 5669 index = self._index 5670 expression = self._parse_column() 5671 if not self._match(TokenType.IN): 5672 self._retreat(index - 1) 5673 return None 5674 iterator = self._parse_column() 5675 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5676 return self.expression( 5677 exp.Comprehension, 5678 this=this, 5679 expression=expression, 5680 iterator=iterator, 5681 condition=condition, 5682 ) 5683 5684 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5685 if self._match(TokenType.HEREDOC_STRING): 5686 return self.expression(exp.Heredoc, this=self._prev.text) 5687 5688 if not self._match_text_seq("$"): 5689 return None 5690 5691 tags = ["$"] 5692 tag_text = None 5693 5694 if self._is_connected(): 5695 self._advance() 5696 tags.append(self._prev.text.upper()) 5697 else: 5698 self.raise_error("No closing $ found") 5699 5700 if tags[-1] != "$": 5701 if self._is_connected() and self._match_text_seq("$"): 5702 tag_text = tags[-1] 5703 tags.append("$") 5704 else: 5705 self.raise_error("No closing $ found") 5706 5707 heredoc_start = self._curr 5708 5709 while self._curr: 5710 if self._match_text_seq(*tags, advance=False): 5711 this = self._find_sql(heredoc_start, self._prev) 5712 self._advance(len(tags)) 5713 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5714 5715 self._advance() 5716 5717 self.raise_error(f"No closing {''.join(tags)} found") 5718 return None 5719 5720 def _find_parser( 5721 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5722 ) -> t.Optional[t.Callable]: 5723 if not self._curr: 5724 return None 5725 5726 index = self._index 5727 this = [] 5728 while True: 5729 # The current token might be multiple words 5730 curr = self._curr.text.upper() 5731 key = curr.split(" ") 5732 this.append(curr) 5733 5734 self._advance() 5735 result, trie = in_trie(trie, key) 5736 if result == TrieResult.FAILED: 5737 break 5738 5739 if result == TrieResult.EXISTS: 5740 subparser = parsers[" ".join(this)] 5741 return subparser 5742 5743 self._retreat(index) 5744 return None 5745 5746 def _match(self, token_type, advance=True, expression=None): 5747 if not self._curr: 5748 return None 5749 5750 if self._curr.token_type == token_type: 5751 if advance: 5752 self._advance() 5753 self._add_comments(expression) 5754 return True 5755 5756 return None 5757 5758 def _match_set(self, types, advance=True): 5759 if not self._curr: 5760 return None 5761 5762 if self._curr.token_type in types: 5763 if advance: 5764 self._advance() 5765 return True 5766 5767 return None 5768 5769 def _match_pair(self, token_type_a, token_type_b, advance=True): 5770 if not self._curr or not self._next: 5771 return None 5772 5773 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5774 if advance: 5775 self._advance(2) 5776 return True 5777 5778 return None 5779 5780 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5781 if not self._match(TokenType.L_PAREN, expression=expression): 5782 self.raise_error("Expecting (") 5783 5784 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5785 if not self._match(TokenType.R_PAREN, expression=expression): 5786 self.raise_error("Expecting )") 5787 5788 def _match_texts(self, texts, advance=True): 5789 if self._curr and self._curr.text.upper() in texts: 5790 if advance: 5791 self._advance() 5792 return True 5793 return None 5794 5795 def _match_text_seq(self, *texts, advance=True): 5796 index = self._index 5797 for text in texts: 5798 if self._curr and self._curr.text.upper() == text: 5799 self._advance() 5800 else: 5801 self._retreat(index) 5802 return None 5803 5804 if not advance: 5805 self._retreat(index) 5806 5807 return True 5808 5809 @t.overload 5810 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5811 ... 5812 5813 @t.overload 5814 def _replace_columns_with_dots( 5815 self, this: t.Optional[exp.Expression] 5816 ) -> t.Optional[exp.Expression]: 5817 ... 5818 5819 def _replace_columns_with_dots(self, this): 5820 if isinstance(this, exp.Dot): 5821 exp.replace_children(this, self._replace_columns_with_dots) 5822 elif isinstance(this, exp.Column): 5823 exp.replace_children(this, self._replace_columns_with_dots) 5824 table = this.args.get("table") 5825 this = ( 5826 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5827 ) 5828 5829 return this 5830 5831 def _replace_lambda( 5832 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5833 ) -> t.Optional[exp.Expression]: 5834 if not node: 5835 return node 5836 5837 for column in node.find_all(exp.Column): 5838 if column.parts[0].name in lambda_variables: 5839 dot_or_id = column.to_dot() if column.table else column.this 5840 parent = column.parent 5841 5842 while isinstance(parent, exp.Dot): 5843 if not isinstance(parent.parent, exp.Dot): 5844 parent.replace(dot_or_id) 5845 break 5846 parent = parent.parent 5847 else: 5848 if column is node: 5849 node = dot_or_id 5850 else: 5851 column.replace(dot_or_id) 5852 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1035 def __init__( 1036 self, 1037 error_level: t.Optional[ErrorLevel] = None, 1038 error_message_context: int = 100, 1039 max_errors: int = 3, 1040 dialect: DialectType = None, 1041 ): 1042 from sqlglot.dialects import Dialect 1043 1044 self.error_level = error_level or ErrorLevel.IMMEDIATE 1045 self.error_message_context = error_message_context 1046 self.max_errors = max_errors 1047 self.dialect = Dialect.get_or_raise(dialect) 1048 self.reset()
1060 def parse( 1061 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1062 ) -> t.List[t.Optional[exp.Expression]]: 1063 """ 1064 Parses a list of tokens and returns a list of syntax trees, one tree 1065 per parsed SQL statement. 1066 1067 Args: 1068 raw_tokens: The list of tokens. 1069 sql: The original SQL string, used to produce helpful debug messages. 1070 1071 Returns: 1072 The list of the produced syntax trees. 1073 """ 1074 return self._parse( 1075 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1076 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1078 def parse_into( 1079 self, 1080 expression_types: exp.IntoType, 1081 raw_tokens: t.List[Token], 1082 sql: t.Optional[str] = None, 1083 ) -> t.List[t.Optional[exp.Expression]]: 1084 """ 1085 Parses a list of tokens into a given Expression type. If a collection of Expression 1086 types is given instead, this method will try to parse the token list into each one 1087 of them, stopping at the first for which the parsing succeeds. 1088 1089 Args: 1090 expression_types: The expression type(s) to try and parse the token list into. 1091 raw_tokens: The list of tokens. 1092 sql: The original SQL string, used to produce helpful debug messages. 1093 1094 Returns: 1095 The target Expression. 1096 """ 1097 errors = [] 1098 for expression_type in ensure_list(expression_types): 1099 parser = self.EXPRESSION_PARSERS.get(expression_type) 1100 if not parser: 1101 raise TypeError(f"No parser registered for {expression_type}") 1102 1103 try: 1104 return self._parse(parser, raw_tokens, sql) 1105 except ParseError as e: 1106 e.errors[0]["into_expression"] = expression_type 1107 errors.append(e) 1108 1109 raise ParseError( 1110 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1111 errors=merge_errors(errors), 1112 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1149 def check_errors(self) -> None: 1150 """Logs or raises any found errors, depending on the chosen error level setting.""" 1151 if self.error_level == ErrorLevel.WARN: 1152 for error in self.errors: 1153 logger.error(str(error)) 1154 elif self.error_level == ErrorLevel.RAISE and self.errors: 1155 raise ParseError( 1156 concat_messages(self.errors, self.max_errors), 1157 errors=merge_errors(self.errors), 1158 )
Logs or raises any found errors, depending on the chosen error level setting.
1160 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1161 """ 1162 Appends an error in the list of recorded errors or raises it, depending on the chosen 1163 error level setting. 1164 """ 1165 token = token or self._curr or self._prev or Token.string("") 1166 start = token.start 1167 end = token.end + 1 1168 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1169 highlight = self.sql[start:end] 1170 end_context = self.sql[end : end + self.error_message_context] 1171 1172 error = ParseError.new( 1173 f"{message}. Line {token.line}, Col: {token.col}.\n" 1174 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1175 description=message, 1176 line=token.line, 1177 col=token.col, 1178 start_context=start_context, 1179 highlight=highlight, 1180 end_context=end_context, 1181 ) 1182 1183 if self.error_level == ErrorLevel.IMMEDIATE: 1184 raise error 1185 1186 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1188 def expression( 1189 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1190 ) -> E: 1191 """ 1192 Creates a new, validated Expression. 1193 1194 Args: 1195 exp_class: The expression class to instantiate. 1196 comments: An optional list of comments to attach to the expression. 1197 kwargs: The arguments to set for the expression along with their respective values. 1198 1199 Returns: 1200 The target expression. 1201 """ 1202 instance = exp_class(**kwargs) 1203 instance.add_comments(comments) if comments else self._add_comments(instance) 1204 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1211 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1212 """ 1213 Validates an Expression, making sure that all its mandatory arguments are set. 1214 1215 Args: 1216 expression: The expression to validate. 1217 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1218 1219 Returns: 1220 The validated expression. 1221 """ 1222 if self.error_level != ErrorLevel.IGNORE: 1223 for error_message in expression.error_messages(args): 1224 self.raise_error(error_message) 1225 1226 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.